diff options
author | Kazutomo Yoshii <kazutomo@mcs.anl.gov> | 2010-04-19 09:44:29 -0500 |
---|---|---|
committer | Eric Van Hensbergen <ericvh@gmail.com> | 2011-02-15 15:29:49 -0600 |
commit | bee9f329eeef6c8eb95c35de4c5d22a0c05a1b3e (patch) | |
tree | 82717e09cbc58ea3671f9940daeb93dbd5e63524 | |
parent | 8d7bff2d72660d9d60aa371ae3d1356bbf329a09 (diff) | |
download | bluegene-ibm-cn-2.6.29.1.tar.gz |
IBM CN patchibm-cn-2.6.29.1
116 files changed, 52012 insertions, 107 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 74cc312c347cf8..9900af0e37773f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -510,6 +510,13 @@ config CMDLINE some command-line options at build time by entering them here. In most cases you will need to specify the root device here. +config WRAP_COPY_TOFROM_USER + bool "C-language wrapper for copy to/from user" + default n + help + Set this if you want to instrument the low-level function which block-copies data + between user-space and kernel-space + config EXTRA_TARGETS string "Additional default image types" help diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 72d17f50e54fa7..fc68dca9e91d73 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -125,7 +125,9 @@ ifeq ($(CONFIG_FUNCTION_TRACER),y) KBUILD_CFLAGS += -mno-sched-epilog endif -cpu-as-$(CONFIG_4xx) += -Wa,-m405 +ifndef CONFIG_BGP +cpu-as-$(CONFIG_4xx) += -Wa,-m450 +endif cpu-as-$(CONFIG_6xx) += -Wa,-maltivec cpu-as-$(CONFIG_POWER4) += -Wa,-maltivec cpu-as-$(CONFIG_E500) += -Wa,-me500 @@ -151,6 +153,7 @@ core-y += arch/powerpc/kernel/ \ core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ core-$(CONFIG_XMON) += arch/powerpc/xmon/ core-$(CONFIG_KVM) += arch/powerpc/kvm/ +core-$(CONFIG_BGP) += arch/powerpc/syslib/bgdd/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index e84df338ea2987..5e09e7e0658b7d 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -39,6 +39,7 @@ DTS_FLAGS ?= -p 1024 $(obj)/4xx.o: BOOTCFLAGS += -mcpu=405 $(obj)/ebony.o: BOOTCFLAGS += -mcpu=405 +$(obj)/bgp.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405 @@ -60,7 +61,7 @@ src-wlib := string.S crt0.S crtsavres.S stdio.c main.c \ gunzip_util.c elf_util.c $(zlib) devtree.c oflib.c ofconsole.c \ 4xx.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c bamboo.c \ cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \ - fsl-soc.c mpc8xx.c pq2.c + fsl-soc.c mpc8xx.c pq2.c bgp.c src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \ cuboot-ebony.c treeboot-ebony.c prpmc2800.c \ ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \ @@ -193,6 +194,7 @@ image-$(CONFIG_PPC_PSERIES) += zImage.pseries image-$(CONFIG_PPC_MAPLE) += zImage.pseries image-$(CONFIG_PPC_IBM_CELL_BLADE) += zImage.pseries image-$(CONFIG_PPC_PS3) += dtbImage.ps3 +image-$(CONFIG_BGP) += dtbImage.bgp image-$(CONFIG_PPC_CELLEB) += zImage.pseries image-$(CONFIG_PPC_CELL_QPACE) += zImage.pseries image-$(CONFIG_PPC_CHRP) += zImage.chrp diff --git a/arch/powerpc/boot/bgcns.h b/arch/powerpc/boot/bgcns.h new file mode 100644 index 00000000000000..238ad401a3cbfb --- /dev/null +++ b/arch/powerpc/boot/bgcns.h @@ -0,0 +1,1060 @@ +/* + * (C) Copyright IBM Corp. 2007, 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Tom Gooding, IBM + */ + + +#ifndef _BGCNS_H +#define _BGCNS_H + + +#ifndef __ASSEMBLY__ + +/*! @page CNS Common Node Services + * + * @section CNS_S10 Overview + * + * As the name implies, the <b>Common Node Services (CNS)</b> layer provides @b services + * to the kernel. These services may be simple queries abstracting various node + * specific data (such as DDR size) or they may be more sophisticated software services, + * such as common machine check handling. Additionally, some services may be implicit, + * such as the initialization of various hardware devices unique to Blue Gene, such as + * Netbus and SerDes. + * + * Services are not directly linked into the kernel, but rather are invoked from kernel + * code via a <b>service directory</b> which is itself part of an overall <b>service + * descriptor</b>. This service descriptor is constructed during initialization and + * is passed to the kernel when the kernel is booted. The service directory is a + * collection of <b>service references</b>. + * + * During partition (block) booting, ELF images are loaded onto the compute and I/O nodes. + * The bootloader (@i aka microloader) boots first and then transfers control to the Common + * Node Services layer so that it, in turn, may boot. + * + * Once the CNS layer has booted, control is transferred to the kernel so that it may also + * boot. All services provided by the CNS layer are immediately available at this time. + * + * @section CNS_S20 Programming Model + * + * A kernel running on top of the CNS layer is not statically linked to the common services. + * Instead, the services are called via function pointers provided by the services descriptor, + * which is described here: @ref _BGCNS_ServiceDirectory. + * + * The kernel must operate under the following rules and restrictions: + * @li The kernel must not alter the services descriptor. The descriptor must be treated as a read-only + * data structure even though the kernel may have the ability to alter it. Because CNS trusts the + * kernel, this also implies that the kernel must not expose the descriptor to any untrusted + * software (such as application code). + * @li The kernel must ensure that the CNS virtual memory region is mapped prior to invoking any + * service. + * @li The kernel must ensure that any data passed to services via parameters is mapped. + * Specifically, TLB entries must be mapped as shared (TID = 0) and must be either readable + * (input parameters) or readable and writeable (output parameters). + * @li The kernel must treat the virtual address range (@ref _BGCNS_Descriptor::baseVirtualAddress , + * _BGCNS_Descriptor::baseVirtualAddress + @ref _BGCNS_Descriptor::size - 1) as reserved. + * That is, the kernel must not use this region of virtual memory for anything besides accessing + * the services descriptor. + * @li The kernel must treat the physical address range (@ref _BGCNS_Descriptor::basePhysicalAddress, + * _BGCNS_Descriptor::basePhysicalAddress + _BGCNS_Descriptor::size - 1) as reserved. The + * kernel must not map this memory for any other use. + * @li The kernel must not access any of the reserved virtual address regions with TLB settings that + * are different from those used by CNS. The kernel is allowed to unmap any of the reserved + * memory TLBs for its own use. However, in such a case and per the rule above, the kernel must + * ensure that the region is mapped prior to using any CNS facilities (such as invoking a service). + * @li CNS may need to map one or more TLB entries in order to access Blue Gene devices. In such a case, + * CNS may borrow TLB entries; the TLB will be returned to its original state before the service returns + * control to the invoking kernel. Kernels may avoid this behavior for specific devices by using + * the mapDevice service. + * @li The kernel's ELF image must avoid the 256K region of memory between 0x07000000 and 0x0703FFFF. This + * region is used for the pre-relocated CNS image and will be available for general use once CNS boot + * is complete. + * @li The kernel must not alter any reserved SPRs, DCRs or memory-mapped device registers. + * + * The CNS software may behave unpredictably if any of these rules and restrictions is violated. + * + * Kernels may make the following assumptions about CNS: + * + * @li The data passed in the firmware descriptor (see below) is static. Specifically, the base addresses, + * size and service directory will not change once CNS boot is complete. + * + * @subsection CNS_21 Programming Examples + * + * @subsubsection CNS_211 Obtaining the Personality + * + * The following example shows how to fetch a copy of the Blue Gene personality structure and also + * serves as a simple example of invoking a service: + * + * @code + * + * BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time + * _BGP_Personality_t* pers = (_BGP_Personality_t*)(*descr->services->getPersonalityData)(); + * ... + * @endcode + * + * The programming model guarantees that the descriptor is static. Thus, one can provide a + * convenience method to make service invocation a little more readable + * + * @code + * + * + * static BGCNS_Descriptor* _cns_descriptor = ...; // obtained from CNS at boot time + * + * inline BGCNS_ServiceDirectory* cns() { return _cns_descriptor->services; } + * + * void foo() { + * _BGP_Personality_t* pers = (_BGP_Personality_t*)cns()->getPersonalityData(); + * ... + * } + * + * @endcode + * + * This style will be used in all of the subsequent examples. + * + * @subsubsection CNS_212 SMP Initialization + * + * Common Node Services will launch the kernel on a single core (typically core 0) and will + * leave the remaining cores parked. The kernel can activate additional cores via the @c takeCPU + * service. Here is a very simple example of such kernel code: + * + * @code + * + * void anEntryPoint(unsigned core, void* arg_not_used) { + * // Do whatever your kernel needs to do here. Typically, + * // this function never returns. You will arrive here + * // when takeCPU is invoked (below). + * } + * + * void someCodeOnTheMainThread() { + * + * // ... + * + * unsigned N = cns()->getNumberOfCores(); + * + * for (core = 1; core < N; core++) { + * if ( cns()->takeCPU(core, NULL, &anEntryPoint) != 0 ) { + * // error handling goes here + * } + * } + * + * // ... + * } + * + * @endcode + * + * @subsubsection CNS_213 Version Compatibility + * + * Common Node Services structures and APIs should remain compatible within maintenance + * releases and e-fixes. Kernel's may add a runtime check to ensure that the version + * of CNS is compatible with the version from compile time. This is done as follows: + * + * @code + * + * BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time + * + * if ( ! BGCNS_IS_COMPATIBLE(descr) ) { + * // incompatible CNS (panic?) + * } + * + * @endcode + * + * @subsubsection CNS_23 Interrupts + * + * A kernel wanting to use the CNS interrupt services would first have to enable interrupts + * for the appropriate Blue Gene BIC group and IRQ within that group. This would likely be + * done at boot time. So, for example, such a kernel could enable interrupts for the Universal + * Performance Counter (group 5, IRQ 2) to be handled by the non-critical handler of core 0 as + * follows: + * + * @code + * cns()->enableInterrupt(5, 2, BGCNS_NonCritical, 0); + * @endcode + * + * Such a kernel might also maintain a collection of routines that act as subhandlers of the + * non-critical interrupt handler. In this example, we'll assume it is simply a two + * dimensional array indexed by group and IRQ: + * + * @code + * subhandlers[5][2] = &theUpcSubHandler; + * @endcode + * + * That kernel's non-critical interrupt handler would then typically handle all interrupts by + * successively invoking the getInterrupt() service to determine the group and IRQ, and then + * dispatching the appropriate subhandler. Additionally, the interrupt will be acknowledged + * so to avoid continuous interruption: + * + * @code + * unsigned grp, irq; + * + * while ( cns()->getInterrupt(&g, &i, BGCNS_NonCritical) == 0) { + * (*subhandlers[g][i])(); // dispatch the handler + * cns()->acknowledgeInterrupt(g,i); // ack the interrupt + * } + * @endcode + * + * @subsubsection CNS_24 Global Barriers and Interrupts + * + * The Blue Gene/P Global Interrupt Controller (aka GLINT) provides 4 independent channels + * that may be configured as either a global barrier or a global interrupt. + * + * Barriers are constructed by invoking the barrier service: + * + * @code + * unsigned channel = 0; + * + * // synchronize: + * int reset = 1; + * int rc; + * while ( (rc = cns()->globalBarrier_nonBlocking(channel, reset, 1000)) == BGCNS_RC_CONTINUE ) { + * reset = 0; + * } + * + * if ( rc == BGCNS_RC_COMPLETE ) { + * // good path + * } + * else { + * // error + * } + * @endcode + * + * Similarly, a barrier with a timeout can also be constructed: + * + * @code + * unsigned channel = 0; + * int reset = 1; + * unsigned long long startTime = ...; // obtain current time + * int rc; + * + * while ( (rc = cns()->globalBarrier_nonBlocking(channel,reset, 1000)) == BGCNS_RC_CONTINUE ) { + * reset = 0; + * unsigned long long currentTime = ...; // obtain current time + * if ( currentTime - startTime > timeout ) + * break; + * } + * + * if ( rc == BGCNS_RC_COMPLETE ) { + * // good path + * } + * else { + * // timeout or error + * } + * @endcode + * + * A node may opt out of a barrier channel via the disableBarrier service: + * + * @code + * + * // some other synchronization mechanism needs to go here + * + * cns()->disableBarrier(channel); + * + * @endcode + * + * Conversely, it may opt back in: + * + * @code + * cns()->enableBarrier(channel, user_mode); + * @endcode + * + * By default, CNS reserves the use of channel 2 as a global interrupt for environmental + * monitoring. It also reserves channel 3 for use as a supervisory mode, compute-node + * only barrier. Compute node kernels are free to share this channel for the same + * purpose (compute node, supervisory barrier). The enable/disable barrier services + * may return errors if operating on a reserved channel. + * + * NOTE: The standard BG/P software stack, which includes I/O node Linux and Compute Node + * Kernel (CNK) uses channel 0 as an I/O node barrier during boot and transforms it to an + * compute-node only barrier when jobs execute. + * + * + * @section CNS_3 DMA Services + * + * The DMA services provided in CNS are low-level services. Interested readers of this area should + * also look at the documentation for the DMA SPIs, which are at a slightly higher level. + * + * + * + * @section CNS_4 Reserved and Preferred Addresses + * + * + * The following virtual memory regions are reserved and must be avoided by + * kernels: + * + * @code + * + * +------------+------------+------+----------------------+-----------------------+ + * | Lower | Upper | Size | Usage | Attributes | + * +------------+------------+------+----------------------+-----------------------+ + * | CNSlow[1] | CNShigh[2] | 256K | CNS | I, Rs, Ws, Xs | + * +------------+------------+------+----------------------+-----------------------+ + * + * [1] CNSlow = descr->baseVirtualAddress , usually 0xFFF40000 + * [2] CNShigh = descr->baseVirtualAddress + descr->size - 1; usually 0xFFF7FFFF + * + * @endcode + * + * The following virtual memory regions are used by default in CNS. Kernels that wish to have + * a different memory map may do so via the mapDevice service. + * + * @code + * +------------+------------+------+----------------------+-----------------------+ + * | Lower | Upper | Size | Usage | Attributes | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFB0000 | 0xFFFCFFFF | 64K | Torus | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFD0000 | 0xFFFD3FFF | 16K | DMA | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFD9000 | 0xFFFD9FFF | 4K | DevBus | I, G, Rs, Ws | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFDA000 | 0xFFFDAFFF | 4K | UPC | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFDC000 | 0xFFFDD3FF | 4K | Collective | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFDE000 | 0xFFFDEFFF | 4K | BIC | I, G, Rs, Ws, Xs | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFF0000 | 0xFFFF3FFF | 16K | Lockbox (supervisor) | I, G, Rs, Ws | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFF4000 | 0xFFFF7FFF | 16K | Lockbox (user) | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFF8000 | 0xFFFFFFFF | 32K | SRAM | SWOA, WL1, Rs, Ws, Xs | + * +------------+------------+------+----------------------+-----------------------+ + * @endcode + * + */ + + +#define BGCNS_VERSION 0x01030000 /* V1R3M0 efix 0 */ +#define BGCNS_IS_COMPATIBLE(descr) ( ((descr)->version & 0xFFFF0000) == (BGCNS_VERSION & 0xFFFF0000) ) //!< True iff the given descriptor is compatible with this version of CNS + +/* ! @enum BGCNS_InterruptType */ +/* ! @brief Defines the different types of interrupts known to */ +/* ! Common Node Services. */ +typedef enum { + BGCNS_NonCritical, //!< Non-critical interrupt + BGCNS_Critical, //!< Critical interrupt + BGCNS_MachineCheck, //!< Machine check +} BGCNS_InterruptType; + +/* ! @enum BGCNS_FifoOperation */ +/* ! @brief Defines the types of FIFO operations */ +/* ! @see _BGCNS_ServiceDirectory::setDmaFifoControls */ +/* ! @see _BGCNS_ServiceDirectory::setDmaLocalCopies */ +/* ! @see _BGCNS_ServiceDirectory::setDmaPriority */ +typedef enum { + BGCNS_Disable = 0, + BGCNS_Enable = 1, + BGCNS_Reenable = 2 +} BGCNS_FifoOperation; + +/* ! @enum BGCNS_FifoFacility */ +/* ! @brief Defines the various types of FIFO facilities */ +typedef enum { + BGCNS_InjectionFifo, //!< Normal Injection FIFO + BGCNS_ReceptionFifo, //!< Normal Reception FIFO + BGCNS_ReceptionHeaderFifo, //!< Reception Header FIFO (typically used only for debugging) + BGCNS_InjectionFifoInterrupt, + BGCNS_ReceptionFifoInterrupt, + BGCNS_ReceptionHeaderFifoInterrupt, + BGCNS_InjectionCounterInterrupt, + BGCNS_ReceptionCounterInterrupt +} BGCNS_FifoFacility; + +/* ! @enum BGCNS_LinkType */ +/* ! @brief Defines the types of MAC links. */ +/* ! @see _BGCNS_ServiceDirectory::macTestLink */ +typedef enum { + BGCNS_Transmitter, //!< A transmitter link. + BGCNS_Receiver //!< A receiver link. +} BGCNS_LinkType; + +/* ! @enum BGCNS_EnvmonParameter */ +/* ! @brief Enumerates the various environmental monitor parameters. */ +/* ! @see _BGCNS_ServiceDirectory::getEnvmonParm */ +/* ! @see _BGCNS_ServiceDirectory::setEnvmonParm */ +typedef enum { + BGCNS_envmon_period = 0, + BGCNS_envmon_policy, + BGCNS_envmon_globintwire, + + /* temporary */ + BGCNS_envmon_duration, + BGCNS_envmon_ddrratio, + BGCNS_envmon_numparms +} BGCNS_EnvmonParameter; + + +#define BGCNS_RC_COMPLETE 0 //!< Indicates that the operation completed normally. +#define BGCNS_RC_CONTINUE 1 //!< Indicates that the operation is still in progress. +#define BGCNS_RC_TIMEOUT -1 //!< Indicates that the operation timed out. +#define BGCNS_RC_ERROR -2 //!< Indicates that the operation failed. + +#define BGCNS_NUM_DMA_RECEPTION_GROUPS 4 +#define BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP 8 + +/* ! @brief Describes the mapping of physical torus reception FIFOs to DMA reception FIFOs (rmFIFOs). */ +/* ! The first dimension indexes DMA reception groups, which are a combination of PID0 and PID1 bits */ +/* ! from the DMA packet. */ +/* ! */ +/* ! The second dimension indexes through the different dimensions: X+, X-, Y+, Y-, Z+, Z-, high priority */ +/* ! and local copy. */ +typedef unsigned char BGCNS_ReceptionMap[BGCNS_NUM_DMA_RECEPTION_GROUPS][BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP]; + +/* ! @brief Indicates that an interrupt is to be broadcast on all cores. */ +/* ! @see _BGCNS_ServiceDirectory::enableInterrupt */ +#define BGCNS_ALL_CORE_BROADCAST 0xFFFFFFFFu + + +/* ! @enum BGCNS_DeviceMasks */ +/* ! @brief Provides a list of masks for various Blue Gene devices */ + +typedef enum { + BGCNS_SRAM = 0x80000000u, + BGCNS_BIC = 0x40000000u, + BGCNS_Torus = 0x20000000u, + BGCNS_DevBus = 0x10000000u, + BGCNS_XEMAC = 0x08000000u, + BGCNS_LockBox = 0x04000000u, + BGCNS_Collective = 0x02000000u, + BGCNS_SRAM_Err = 0x01000000u, + BGCNS_DMA = 0x00800000u, + BGCNS_UPC = 0x00400000u +} BGCNS_DeviceMasks; + +/* ! @typedef BGCNS_ServiceDirectory */ +/* ! @struct _BGCNS_ServiceDirectory */ +/* ! @brief The service directory is a collection of function pointers to services */ +/* ! provided by the Common Node Services. */ +typedef struct _BGCNS_ServiceDirectory { + + /*------------------------------------------*/ + /*--- Informational services for the node --*/ + /*------------------------------------------*/ + + + int (*isIONode)(void); //!< Returns 1 if this is an I/O node; 0 if not. + + + /*-----------------------------------------------------------------*/ + /*--- Informational services for obtaining Raw personality data ---*/ + /*-----------------------------------------------------------------*/ + + unsigned int (*getPersonalitySize)(void); //!< Returns the size (in bytes) of the Blue Gene personality. + void* (*getPersonalityData)(void); //!< Returns a pointer to the raw personality data. + + + /*-----------------------------------------------*/ + /*--- Services for Symmetric Multi-Processing ---*/ + /*-----------------------------------------------*/ + + + unsigned (*getNumberOfCores)(void); //!< Returns the number of CPUs on this node. + + /* ! @brief Called by the kernel to activate a CPU. */ + /* ! @param[in] cpu The index of the cpu (core) to be activated. */ + /* ! @param[in] entry The (kernel) entry point function. This function will be invoked when */ + /* ! the CPU is actually activated. */ + /* ! @param[in] arg A pointer to the lone argument to be passed to the entry point. */ + /* ! @return Zero (0) if the CPU was succsessfully activated. Non-zero if the CPU was not */ + /* ! activated (e.g. invalid cpu argument, or the cpu has already been */ + /* ! activated). */ + /* ! @remarks See Section x of the Common Node Services overview for details. */ + int (*takeCPU)(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg)); + + + /*--------------------------------------*/ + /*--- Services for Blue Gene devices ---*/ + /*--------------------------------------*/ + + /* ! @brief Checks active devices for a clean termination state and returns 0 */ + /* ! if everything is nominal. Returns non-zero if any anomaly is */ + /* ! detected and logs violations. */ + /* ! @param[in] job_rc specifies the return code of the job that is terminating. */ + int (*terminationCheck)(int job_rc); + + /*-------------------------------*/ + /*--- Services for interrupts ---*/ + /*-------------------------------*/ + + + /* ! @brief Enables the specified interrupt. For all interrupts except inter-processor */ + /* ! interrupts, the interrupt will bendled by the specified core. */ + /* ! @param[in] group Specifies the Blue Gene interrupt group */ + /* ! @param[in] irq Specifies the interrupt index within the group */ + /* ! @param[in] itype Specifies the type of interrupt that hardware will present */ + /* ! for this group/irq. */ + /* ! @param[in] core Specifies which core will handle the interrupt. If specified as */ + /* ! BGCNS_ALL_CORE_BROADCAST, then all cores will handle the interrupt. */ + /* ! @return Returns zero (0) if the interrupt is enabled and returns non-zero if it was not */ + /* ! (including the case of bad arguments). */ + int (*enableInterrupt)(unsigned group, unsigned irq, BGCNS_InterruptType itype, unsigned core); + + /* ! @brief Disables the specified interrupt. */ + /* ! @param[in] group Specifies the Blue Gene interrupt group */ + /* ! @param[in] irq Specifies the interrupt index within the group */ + /* ! @return Returns zero (0) if the interrupt is disabled and returns non-zero if it was not */ + /* ! (including the case of bad arguments). */ + int (*disableInterrupt)(unsigned group, unsigned irq); + + /* ! @brief Queries the Blue Gene interrupt hardware for interrupts of the given */ + /* ! type and returns the group/IRQ. This service is typically used in the */ + /* ! context of an interrupt handler. Since multiple interrupt conditions */ + /* ! may be present, the service is typically invoked from the handler */ + /* ! (along with corresponding acknowledgement) until the return code */ + /* ! indicates that no more interrupts are present. */ + /* ! @param[out] group Specifies the Blue Gene interrupt group. The value is valid */ + /* ! only when the return code is 0. */ + /* ! @param[out] irq Specifies the interrupt index within the group. The value is */ + /* ! valid only when the reutrn code is zero. */ + /* ! @param[in] itype Specifies the type of interrupt being queried. */ + /* ! @return Returns zero (0) if an interrupt condition of the specified type exists. Returns -1 */ + /* ! if no such condition exists. */ + int (*getInterrupt)(BGCNS_InterruptType itype, unsigned* group, unsigned* irq); + + /* ! @brief Acknowledges the specified interrupt, thus clearing the interrupt */ + /* ! condition in the interrupt controller hardware. */ + /* ! @param[in] group Specifies the Blue Gene interrupt group */ + /* ! @param[in] irq Specifies the interrupt index within the group */ + /* ! @return Returns zero (0) if the interrupt is acknowledged and returns non-zero if it was not */ + /* ! (including the case of bad arguments). */ + /* ! @remarks Note that for some interrupts, it is not sufficient to only acknowledge */ + /* ! the interrupt; the hardware condition that triggered the interrupt may */ + /* ! also need to be cleared. */ + int (*acknowledgeInterrupt)(unsigned group, unsigned irq); + + /* ! @brief Raises the specified interrupt. */ + /* ! @param[in] group Specifies the Blue Gene interrupt group */ + /* ! @param[in] irq Specifies the interrupt index within the group */ + int (*raiseInterrupt)(unsigned group, unsigned irq); + + + /*------------------------*/ + /*--- Mailbox services ---*/ + /*------------------------*/ + + unsigned (*getMailboxMaximumConsoleInputSize)(void); //!< Returns the actual maximum console message input data size. + unsigned (*getMailboxMaximumConsoleOutputSize)(void); //!< Returns the actual maximum console message output data size. + + /* ! @brief Writes a text message to the output mailbox. */ + /* ! @param[in] msg a pointer to the message to be written. */ + /* ! @param[in] msglen the length (in bytes) of the message to be written. */ + /* ! @remarks As with all common services, the message data area must be mapped via */ + /* ! the TLB when the service is called. The behavior is not defined if this */ + /* ! is not the case. */ + /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */ + /* wrong (including a message that is too large). */ + int (*writeToMailboxConsole)(char *msg, unsigned msglen); + + /* ! @brief Writes a text message to the output mailbox but does not wait for a */ + /* ! response back from the control system. When this service is used, */ + /* ! the caller must poll for completion using the testForOutboxCompletion */ + /* ! service. */ + /* ! @param[in] msg a pointer to the message to be written. */ + /* ! @param[in] msglen the length (in bytes) of the message to be written. */ + /* ! @remarks As with all common services, the message data area must be mapped via */ + /* ! the TLB when the service is called. The behavior is not defined if this */ + /* ! is not the case. */ + /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */ + /* wrong (including a message that is too large). */ + int (*writeToMailboxConsole_nonBlocking)(char* msg, unsigned msglen); + + /* ! @brief Tests the outbox to see if the last message was picked up by the control */ + /* ! system. */ + /* ! @return Zero (0) if the last message was piecked and returns non-zero if it has not. */ + /* ! @remarks Typically the caller will invoke this service after having called */ + /* ! writeToMailboxConsole_nonBlocking and will then invoke this service in a */ + /* ! loop until zero is returned. */ + int (*testForOutboxCompletion)(void); + + /* ! @brief Reads a message from the input mail box. */ + /* ! @param msg a pointer to a data area into which the message will be placed. */ + /* ! @param maxMsgSize gives the size of the data area, i.e. the largest message */ + /* ! that may be safely received into the buffer. */ + /* ! @return The actual length of the message (0 if no message was receieved). */ + /* ! @remarks As with all common services, the message data area must be mapped */ + /* ! via the TLB when this service is called. The results are not defined if */ + /* ! this is not the case. */ + unsigned (*readFromMailboxConsole)(char *buf, unsigned bufsize); + + int (*testInboxAttention)(void); //!< Returns 1 if something is available in the input mailbox. + + int (*_no_longer_in_use_1_)(void); //!< Obsolete ... do not use. + + int (*writeToMailbox)(void* message, unsigned length, unsigned cmd); + + /*------------------------------------*/ + /*--- RAS and diagnostic services ---*/ + /*------------------------------------*/ + + /* ! @brief TBD */ + void (*machineCheck)(void *regs); + + /* ! @brief Writes a RAS event to the log. */ + /* ! @param[in] facility The facility (aka component). */ + /* ! @param[in] unit The unit (aka subcomponent). */ + /* ! @param[in] err_code The error code. */ + /* ! @param[in] numDetails The number of additional details. */ + /* ! @param[in] details The list of additional details. */ + /* ! @return Zero if the message was written, non-zero if some error condition occurred. */ + /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */ + int (*writeRASEvent)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] ); + + /* ! @brief Writes a RAS string to the log. */ + /* ! @param[in] facility The facility (aka component). */ + /* ! @param[in] unit The unit (aka subcomponent). */ + /* ! @param[in] err_code The error code. */ + /* ! @param[in] str The message string being written (ASCII encoded, null-terminated). Note that the length of this string is */ + /* ! limited to _BGP_RAS_ASCII_MAX_LEN characters. The implementation may choose to truncate the string if it exceeds this */ + /* ! length. */ + /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */ + /* ! string was truncated). */ + /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */ + int (*writeRASString)( unsigned facility, unsigned unit, unsigned short err_code, char* str ); + + + /*---------------------------------*/ + /*--- Global Interrupt services ---*/ + /*---------------------------------*/ + + /* ! @brief A global (compute node) barrier. This call will block until all other compute nodes */ + /* ! in the partition also arrive at the barrier. */ + int (*globalBarrier)(void); + + /* ! @brief A global (compute node) barrier. This call will block until all other compute nodes */ + /* ! in the partition also arrive at the barrier or until the timeout is reached. */ + /* ! @param timeoutInMillis specifies the timeout duration. Units are milliseconds. */ + /* ! @return BGCNS_RC_COMPLETE if the barrier completed. BGCNS_RC_TIMEOUT if the barrier timed */ + /* ! out. BGCNS_RC_ERROR if some other error occurred. */ + int (*globalBarrierWithTimeout)(unsigned timeoutInMillis); + + + + /*-------------------------*/ + /*--- Network services ---*/ + /*-------------------------*/ + + + void (*initializeNetworks)(void); //!< @todo Is this is going away??? Talk to Andy + + void (*_no_longer_in_use_381)(void); //!< @warning Do not use + + void (*_no_longer_in_use_384)(void);//!< @warning Do not use + + + /*--------------------------*/ + /*--- DMA unit services ---*/ + /*--------------------------*/ + +#define BGCNS_DMA_CAPTURE_X_PLUS 0 //!< watch the X+ receiver +#define BGCNS_DMA_CAPTURE_X_MINUS 1 //!< watch the X- receiver +#define BGCNS_DMA_CAPTURE_Y_PLUS 2 //!< watch the Y+ receiver +#define BGCNS_DMA_CAPTURE_Y_MINUS 3 //!< watch the Y- receiver +#define BGCNS_DMA_CAPTURE_Z_PLUS 4 //!< watch the Z+ receiver +#define BGCNS_DMA_CAPTURE_Z_MINUS 5 //!< watch the Z- receiver +#define BGCNS_DMA_CAPTURE_DISABLE 7 //!< disable link capturing + + /* ! @brief Sets the link capture facility of the DMA unit to watch the specified */ + /* ! receiver (or disable). */ + /* ! @param[in] link Specifies the link being monitored. Use the BGCNS_DMA_CAPTURE_* */ + /* ! mnemonics defined above. */ + /* ! @return Zero if the operation succeeded, non-zero if it did not (e.g. an invalid */ + /* ! link was specified). */ + int (*setDmaLinkCapture)(int link); + + /* ! @brief Clears the link capture unit so that another packet can be captured. */ + void (*clearDmaLinkCapture)(void); + +#define BGCNS_RC_DMA_NO_PACKET_CAPTURED 0 +#define BGCNS_RC_DMA_CAPTURE_UNIT_ERROR -1 +#define BGCNS_RC_DMA_DATA_CONFLICT -2 //!< if initial read indicates a bad packet is captured but subsequent read shows bad packet not captured +#define BGCNS_RC_DMA_DATA_CONFLICT2 -3 //!< if bad packet is captured, but all the bytes are the same + /* ! @brief Reads the DMA link capture packets. */ + int (*readDmaLinkCapturePackets)(unsigned char* good_packet, int* good_packet_size, unsigned char* bad_packet, int* bad_packet_size); + + +#define BGCNS_DMA_ALL_GROUPS 0xFFFFFFFF + + /* ! @brief Sets FIFO controls for the DMA unit. */ + /* ! */ + /* ! An operation on facility BGCNS_InjectionFifo enables or disables a subset of the 128 DMA injection FIFOs. */ + /* ! The FIFOs are organized into four groups of 32. The mask argument is a bit mask (bit i controls the i-th imFIFO */ + /* ! within that group, that is the (group*32)+i imFIFO. */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionFifo enables or disables a subset of the 32 DMA reception FIFOs. */ + /* ! The group argument is ignored and the mask argument is a bit mask (bit i controls the i-th reception FIFO). */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionHeaderFifo enables or disables the header FIFO for the specified */ + /* ! group. The mask argument is ignored. Note that the header FIFO is typically used for debugging. */ + /* ! */ + /* ! An operation on facility BGCNS_InjectionFifoInterrupt enables or disables threshold interrupts for the */ + /* ! specified injection FIFO. Threshold interrupts occur if available space is less than the configured */ + /* ! threshold when the FIFO is used for a remote get operation. The group and mask arguments are as */ + /* ! described in the BGCNS_InjectionFifo operation (above). */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionFifoInterrupt enables or disables interrupts for the specified */ + /* ! reception FIFO(s). If enabled, an interrupt will occur when the reception FIFO's available space drops */ + /* ! below the configured threshold. The group argument selects the interrupt type (type 0, 1, 2 or 3). */ + /* ! The mask argument is a bit mask selecting one or more of the 32 normal reception FIFOs. */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionHeaderFifoInterrupt enables or disables interrupts for the specified */ + /* ! reception header FIFO. Reception header FIFOs are used for debug purposes only. */ + /* ! */ + /* ! An operation on facility BGCNS_InjectionCounterInterrupt enables or disables "Counter Hit Zero" interrupts. */ + /* ! The group argument does not specify counter group, but rather specifies interrupt 0, 1, 2 or 3. The mask */ + /* ! argument is a bit mask that selects one or more counter subgroups to operate on (the 256 injection counters */ + /* ! are partitioned into 32 subgroups of 8 counters). */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionCounterInterrupt enables or disables "Counter Hit Zero" interrupts */ + /* ! for reception counters. The group and mask arguments are the as as described in the the */ + /* ! BGCNS_InjectionCounterInterrupt operation (above). */ + /* ! */ + /* ! The buffer argument is used as a means to save/restore in an opaque manner. This is achieved by passing */ + /* ! a non-NULL buffer to a disable operation and subsequently passing that buffer during a reenable */ + /* ! operation (the buffer is used to snapshot state). */ + /* ! */ + /* ! */ + /* ! @code */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | Facility | group | mask | Notes | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_InjectionFifo | 0..3 | 32 bits | [1] | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_ReceptionFifo | n/a | 32 bits | [2] | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_ReceptionHeaderFifo | 0..3, ALL | N/A | | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_InjectionFifoInterrupt | 0..3 | 32 bits | [1] | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_ReceptionFifoInterrupt | 0..3 | 32 bits | [3] | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_InjectionCounterInterrupt | 0..3 | 32 bits | [3][4]| */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_ReceptionCounterInterrupt | 0..3 | 32 bits | [3][4]| */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! */ + /* ! [1] There are 128 injection FIFOs partitioned into 4 groups of 32. */ + /* ! [2] There are 32 normal reception FIFOs in BG/P. */ + /* ! [3] There are 4 interrupt lines. The group argument selects one these 4. */ + /* ! [4] There are 256 counters of each type (injection and reception). The */ + /* ! 32-bit mask partitions them into groups of 8. */ + /* ! */ + /* ! @endcode */ + /* ! */ + /* ! @param[in] operation defines the type of operation being performed (enable, disable, or re-enable). */ + /* ! @param[in] facility defines the type of FIFO being configured. */ + /* ! @param[in] group is interpreted differently based on the facility. */ + /* ! @param[in] mask is interpreted differently based on the facility. */ + /* ! @param[out] buffer is interpreted differently based on the operation and facility. It is generally used to capture */ + /* ! a copy of the facility's current state in an enable operation (and may be null, in which case it is ignored). It is */ + /* ! generally used as the value to be loaded in a re-enable operation. In this manner, a state value captured by an enable */ + /* ! operation may be easily restored by a subsequent re-enable operation. The buffer argument is generally ignored by */ + /* ! disable operations. */ + int (*setDmaFifoControls)(BGCNS_FifoOperation operation, BGCNS_FifoFacility facility, unsigned group, unsigned mask, unsigned* buffer); + + /* ! @brief Maps injection FIFOs onto physical (torus hardware) FIFOs. */ + /* ! @param[in] group specifies the injection FIFO group. */ + /* ! @param[in] fifoIds is an array of length numberOfFifos whose elements are the identifiers of the imFIFO (within that */ + /* ! given group). */ + /* ! @param[in] injection_map is an array of length numberOfFifos whose elements are 8-bit masks identifying which of the */ + /* ! physical torus injection FIFOs are mapped. Bits 0-3 correspond to torus group 0, and bits 4-7 correspond to torus */ + /* ! group 1. Bits 3 and 7 are the high priority FIFOs. */ + /* ! @param[in] numberOfFifos describes the number of elements contained in the fifoIds and injection_map arguments. */ + /* ! @return Zero if the map was properly set. Non-zero if it was not, including the case of illegal arguments. */ + /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */ + /* ! argument is 0..3 and the legal range for the fifoIds[] elements is 0..31. */ + + int (*setDmaInjectionMap)(unsigned group, unsigned fifoIds[], unsigned char injection_map[], unsigned numberOfFifos); + + /* ! @brief Enables or disables "local copy" behavior for the specified injection FIFOs. A local copy injection FIFO */ + /* ! can be used to perform memory copies within a node via the DMA engine. */ + /* ! @param[in] operation specifies whether local copies is being enabled or disabled on the specified FIFOs. The BGCNS_Reenable */ + /* ! operation is not supported. */ + /* ! @param[in] group specifies the injection FIFO group. */ + /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */ + /* ! @return Zero if the operation succeeded; non-zero if it did not. */ + /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */ + /* ! argument is 0..3. */ + int (*setDmaLocalCopies)(BGCNS_FifoOperation operation, unsigned group, unsigned bits); + + /* ! @brief Enables or disables the priority bit for the specified injection FIFOs. The priority bit */ + /* ! is used by the hardware arbitration (details are not further documented here). */ + /* ! @param[in] operation specifies whether priority bits are being set or cleared. */ + /* ! @param[in] group specifies the injection FIFO group. */ + /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */ + /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */ + /* ! argument is 0..3. */ + int (*setDmaPriority)(BGCNS_FifoOperation operation, unsigned group, unsigned bits); + + /* ! @brief Sets the mapping from physical (torus hardware) reception FIFOs to reception FIFOs. The hardware supports */ + /* ! 8 torus FIFOs (six torus dimensions plus high priority plus local copy). Furthermore, the hardware supports */ + /* ! 4 groups as derived from the PID0 and PID1 bits of the DMA packet. Thus the mapping is a 4 x 8 matrix of */ + /* ! reception FIFO ids. */ + /* ! @param[in] torus_reception_map maps {group} X {torus-hardware-FIFOs} --> reception FIFOs. */ + /* ! @param[in] fifo_types is an array of N values specifying the type of each normal reception FIFO (see also threshold). For BGP, */ + /* ! N=2 (there are 32 normal reception FIFOs). */ + /* ! @param[in] header_types is an array of N values specifying the type of each reception header FIFO (see also threshold). For */ + /* ! BGP, N=4 (there are 4 reception header FIFOs). Note that reception header FIFOs are typically only used for debugging purposes. */ + /* ! @param[in] threshold is an array of N threshold values. The value threshold[i] specifies the threshold value for reception */ + /* ! FIFO type i. If reception FIFO interrupts are enabled (see setDmaFifoControls) and a reception FIFO's available space drops */ + /* ! below its threshold, an interrupt is driven. For BGP, N=2 (there are type 0 and type 1 injection FIFOs). */ + int (*setDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned header_types[], unsigned threshold[]); + + /* ! @brief Gets the reception map. */ + /* ! @see setDmaReceptionMap for descriptions of the map and arguments. */ + int (*getDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned short* store_headers, unsigned header_types[], unsigned threshold[]); + + + /* ! @deprecated */ + int (*_used_to_be_clearDmaFullReceptionFifo__removed)(void); + + + /* ! @brief Resets the MAC unit's PHY. */ + /* ! @return Zero if the unit was properly reset. Returns non-zero if some error occurred. */ + /* ! @deprecated See macResetPHY_nonBlocking. */ + int (*macResetPHY)(void); + + /* ! @brief Tests the MAC unit's link. */ + /* ! @param[in] link_type specifies the type of link to be tested. */ + /* ! @return One (1) if the link is active; zero (0) if it is not. */ + /* ! @deprecated See macTestLink_nonBlocking */ + int (*macTestLink)(BGCNS_LinkType link_type); + + /* ! @brief Reads one of the MAC's XGMII registers. */ + /* ! @param[in] device_address */ + /* ! @param[in] port_address */ + /* ! @param[in] register_address */ + /* ! @return The register's value or a negative number if some error occurred. */ + /* ! @deprecated Low level MAC register access is being eliminated. */ + int (*macXgmiiRead)(unsigned device_address, unsigned port_address, unsigned register_address); + + /* ! @brief Writes one of the MAC's XGMII registers. */ + /* ! @param[in] device_address */ + /* ! @param[in] port_address */ + /* ! @param[in] register_address */ + /* ! @param[in] value */ + /* ! @return Zero (0) if the register was successfully written; non-zero if some error occurred. */ + /* ! @deprecated Low level MAC register access is being eliminated. */ + int (*macXgmiiWrite)(unsigned device_address, unsigned port_address, unsigned register_address, unsigned value); + + + /* ! @brief Trains SerDes in a non-blocking manner. The standard usage is to inititate */ + /* ! training with trainSerDes(1), check the return code, and then continue to invoke */ + /* ! trainSerDes(0) as long as the return code is BGCNS_RC_CONTINUE. */ + /* ! @param[in] reset Should be 1 when initiating a retraining sequence and 0 for any */ + /* ! continuations. */ + /* ! @return BGCNS_RC_CONTINUE if training is still ongoing (the caller should re-invoke */ + /* ! the service again (with reset=0). BGCNS_RC_COMPLETE if training is complete. */ + /* ! BGCNS_ERROR if some error has occurred. */ + int (*trainSerDes)(int reset); + + /* ! @brief Fetches the value of the specified control parameter of the environmental monitor. */ + /* ! @param[in] parameter Parameter to retrieve. Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */ + /* ! @param[in] value Pointer to the storage location that will contain the parameter's value when the function successfully returns. */ + /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */ + int (*getEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int* value); + + /* ! @brief Stores a value to the specified control parameter of the environmental monitor */ + /* ! @param[in] parameter Parameter to store. Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */ + /* ! @param[in] value New value for the parameter */ + /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */ + int (*setEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int value); + + /* ! @brief Performs checks and ensures that the node will continue to operate within tolerances. */ + /* ! @note MUST be called regularly as indicated by nextCallbackTime parameter */ + /* ! @param[in] nextCallbackTime Upon returning, this will contain the PPC Timebase register value indicating when the next */ + /* ! time the operating system needs to call performEnvMgmt. Failure to do so may result in poorly performing */ + /* ! nodes or shutdown of the block / rack. */ + int (*performEnvMgmt)(unsigned long long* nextCallbackTime); + + + /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */ + /* ! response back from the control system. When this service is used, */ + /* ! the caller must poll for completion using the testForOutboxCompletion */ + /* ! service. */ + /* ! @param[in] facility The facility (aka component). See bgp_ras.h for a list of facilities. */ + /* ! @param[in] unit The unit (aka subcomponent). See bgp_ras.h for a list of units. */ + /* ! @param[in] err_code The error code. See bgp_ras.h for a list of error code.s */ + /* ! @param[in] numDetails The number of additional details. */ + /* ! @param[in] details The list of additional details. */ + /* ! @return Zero if the message was written, non-zero if some error condition occurred. */ + int (*writeRASEvent_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] ); + + /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */ + /* ! response back from the control system. When this service is used, */ + /* ! the caller must poll for completion using the testForOutboxCompletion */ + /* ! service. */ + /* ! @param[in] facility The facility (aka component). See bgp_ras.h for a list of facilities. */ + /* ! @param[in] unit The unit (aka subcomponent). See bgp_ras.h for a list of units. */ + /* ! @param[in] err_code The error code. See bgp_ras.h for a list of error code.s */ + /* ! @param[in] str The message string being written (ASCII encoded, null-terminated). Note that the length of this string is */ + /* ! limited to _BGP_RAS_ASCII_MAX_LEN characters. The implementation may choose to truncate the string if it exceeds this */ + /* ! length. */ + /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */ + /* ! string was truncated). */ + /* ! @return Zero if the message was written, non-zero if some error condition occurred. */ + int (*writeRASString_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, char* str ); + + /* ! @brief Sets the core's timebase registers to the specified value. */ + /* ! @param[in] newtime The new 64-bit timebase */ + /* ! @return Zero if the timebase was successfully set, non-zero if some error condition occurred. */ + /* ! @deprecated */ + int (*synchronizeTimebase)(unsigned long long newtime); + + /* ! @brief Sets the node's DMA physical protection settings. */ + /* ! @note on BGP, there are a maximum of 8 read ranges and 8 write ranges */ + /* ! @return Zero if the DMA ranges were set, non-zero if some error condition occurred. */ + int (*dmaSetRange)(unsigned numreadranges, unsigned long long* read_lower_paddr, unsigned long long* read_upper_paddr, + unsigned numwriteranges, unsigned long long* write_lower_paddr, unsigned long long* write_upper_paddr); + + /* ! @brief Checks the status of the devices and reports correctible RAS (if any) */ + /* ! @param[in] clear_error_counts If non-zero, function will also reset the hardware error counters after posting any RAS. */ + /* ! @return Zero if successful, non-zero if some error condition occurred. */ + int (*statusCheck)(unsigned clear_error_counts); + + /* ! @brief Stops the DMA and clears any reception unit failure */ + int (*stopDma)(void); + + /* ! @brief Starts the DMA */ + int (*startDma)(void); + + /* ! @brief Performs a hard exit. The status code is provided to the control system. */ + /* ! @return This service never returns. */ + void (*exit)(int rc); + + /* ! @brief Resets the MAC unit's PHY but does not block. */ + /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */ + /* ! reset sequence. That is, callers should initiate a reset sequence with reset=1 and then */ + /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this servicate again with */ + /* ! reset=0. */ + /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */ + /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */ + /* ! to indicate that it needs additional time. */ + /* ! @return BGCNS_RC_COMPLETE if the unit was properly reset. BGCNS_RC_CONTINUE if the reset operation is */ + /* ! not yet complete. BGCNS_RC_ERROR if the reset operation failed. */ + int (*macResetPHY_nonBlocking)(int reset, unsigned timeoutInMillis); + + /* ! @brief Tests the MAC unit's link but does not block. */ + /* ! @param[in] link_type specifies the type of link to be tested. */ + /* ! @param[out] result points to the link status, which is valid only when the return code is */ + /* ! BGCNS_RC_COMPLETE. A value of one (1) indicates that the link is active; zero (0) */ + /* ! indicates that it is inactive. */ + /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */ + /* ! test link sequence. That is, callers should initiate a sequence with reset=1 and then */ + /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this service again with */ + /* ! reset=0. */ + /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */ + /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */ + /* ! to indicate that it needs additional time. */ + /* ! @return BGCNS_RC_COMPLETE if the test is complete (result is valid only in this case). BGCNS_RC_CONTINUE */ + /* ! if the reset operation is not yet complete. BGCNS_RC_ERROR if the reset operation failed. */ + int (*macTestLink_nonBlocking)(BGCNS_LinkType link_type, unsigned* result, int reset, unsigned timeoutInMillis); + + void * _not_in_use_1068; + void * _not_in_use_1069; + + + /* ! @brief Indicates that a new job is about to start. */ + /* ! @return Zero (0) if CNS is ready for a new job to start. Returns non-zero otherwise. */ + int (*startNextJob)(void); + + /* ! @brief Indicates that the CNS should use the specified virtual address when accessing the */ + /* ! given device. When a device is remapped, CNS will no longer make any attempt to map */ + /* ! a TLB to access that device -- it is the responsibility of the kernel to handle the */ + /* ! TLB either proactively or reactively (via a fault). */ + /* ! @param[in] device specifies the device being mapped. */ + /* ! @param[in] base_address is the root virtual address of the device. The address should be */ + /* ! naturally aligned (relative to the size of the device). See the seciton Reserved and */ + /* ! Preferred Addresses for more information. */ + /* ! @return Zero (0) if the device was successfully remapped. Returns non-zero if it was not. */ + /* ! @remarks The lock box is in active use by CNS during early boot and thus it is not */ + /* ! possible to remap the BGCNS_LockBox device until all cores are activated by the kernel */ + /* ! (that is, takeCPU has been called for all cores). */ + int (*mapDevice)(BGCNS_DeviceMasks device, void* base_address); + + /* ! @brief Enables barriers on the specified channel. */ + /* ! @param channel specifies the channel being enabled. */ + /* ! @param user_mode indicates whether the barrier is to be used in user-mode code. */ + /* ! @return Zero if global barriers were enabled. Returns non-zero if the request could not be */ + /* ! completed, including the case of attempting to enable a reserved channel. */ + int (*enableBarrier)(unsigned int channel, int user_mode); + + /* ! @brief Disables barriers on the specified channel. */ + /* ! @return Zero if global barriers were disabled. Returnsnon-zero if the request could not be */ + /* ! completed, including the case of attempting to disable a reserved channel. */ + int (*disableBarrier)(unsigned int channel); + + /* ! @brief A global barrier that does not block indefinitely. */ + /* ! @param channel indicates the GLINT hardware channel to use. */ + /* ! @param reset indicates whether this is the beginning (1) or a continuation (0) of a barrier */ + /* ! sequence. That is, caller should inititate a barrier operation by passing reset=1 and then, */ + /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke the service again with */ + /* ! reset=0. */ + /* ! @param timeoutInMillis is the (approximate) number of milliseconds that this service is allowed */ + /* ! to wait for barrier participants before returning to the caller. */ + /* ! @return BGCNS_RC_COMPLETE indicates that all participants have arrived at the barrier. BGCNS_RC_CONTINUE */ + /* ! indicates that not all partipants arrived within the alloted timeout period. BGCNS_RC_ERROR */ + /* ! indicates that other problem has been detected. */ + /* ! @remarks This service is not thread safe. It is considered a programming error to invoke it */ + /* ! from multiple threads concurrently and the behavior is not defined. */ + int (*globalBarrier_nonBlocking)(unsigned channel, int reset, unsigned timeoutInMillis); + + /* ! @brief Restart kernel in cycle reproducibility mode. */ + /* ! @return Zero if no restart was required for reproducibility. */ + /* ! @remarks This service must be called from each core and only after all I/O operations have been completed. */ + /* ! Processors will be reset and kernels will start again. */ + int (*setupReproducibility)(void); + +} BGCNS_ServiceDirectory; + +/* ! @deprecated */ +/* ! @typedef BGCNS_DeprecatedServicesDirectory */ +/* ! @struct _BGCNS_DeprecatedServices */ +/* ! @brief These services exist for historical reasons and are not further documented here. */ +/* ! They may not be available in future releases of CNS. */ +typedef struct _BGCNS_DeprecatedServices { + int (*torusTermCheck)(int* nonFatalRc); + int (*torusLinkErrCheck)(int* nonFatalRc); + int (*torusCRCExchange)(void); + int (*collectiveConfigureClassInternal)(unsigned virtualTree, unsigned short specifier); + int (*collectiveConfigureClass)(unsigned virtualTree, unsigned short specifier); + unsigned (*collectiveGetClass)(unsigned virtualTree); + int (*collectiveInit)(void); + int (*collectiveRelease)(void); + int (*collectiveHardReset)(void); + int (*netbusTermCheck)(void); + unsigned (*getSerDesLinkStatus)(void); + int (*dmaTermCheck)(void); +} BGCNS_DeprecatedServicesDirectory; + +/* ! @typedef BGCNS_Descriptor */ +/* ! @struct _BGCNS_Descriptor */ +/* ! @brief The Common Node Services descriptor. This descriptor provides information to the kernel regarding */ +/* ! the CNS memory region as well as a service directory. The descriptor is passed to the kernel */ +/* ! upon boot and must not be altered by the kernel. */ +typedef struct _BGCNS_Descriptor { + BGCNS_ServiceDirectory* services; //!< A pointer to the services directory. + unsigned baseVirtualAddress; //!< The virtual address of the beginning of the CNS memory region. + unsigned size; //!< The size (in bytes) of the CNS memory region. + unsigned basePhysicalAddress; //!< The physical address of the CNS memory region. + unsigned basePhysicalAddressERPN; //!< The extended real page number of the CNS memory region. + unsigned bgcns_private_in_use; //!< Undefined. This field is for internal use only and may disappear at any time. + BGCNS_DeprecatedServicesDirectory* deprecatedServices; //!< @deprecated undocumented + unsigned version; //!< The CNS version +} BGCNS_Descriptor; + + + +#endif /* !__ASSEMBLY */ +#endif /* _BGCNS_H */ diff --git a/arch/powerpc/boot/bgp.c b/arch/powerpc/boot/bgp.c new file mode 100644 index 00000000000000..9aefcb125732bb --- /dev/null +++ b/arch/powerpc/boot/bgp.c @@ -0,0 +1,166 @@ +/* + * (C) Copyright IBM Corp. 2007, 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Based on earlier code: + * Copyright (C) Paul Mackerras 1997. + * + * Matt Porter <mporter@kernel.crashing.org> + * Copyright 2002-2005 MontaVista Software Inc. + * + * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net> + * Copyright (c) 2003, 2004 Zultys Technologies + * + * David Gibson, IBM Corporation, 2007 + * + */ +#include "types.h" +#include "ops.h" +#include "stdio.h" +#include "4xx.h" +#include "44x.h" +#include "bgcns.h" +/* Types needed for the personality */ +typedef u8 uint8_t; +typedef u16 uint16_t; +typedef u32 uint32_t; +#include "bgp_personality.h" + +/* Blue Gene firmware jumps to 0x10. + * Simply branch to _zimage_start which is typically 0x800000. + * Must also link with --section-start bgstart=0 + */ +asm ( +" .section bgstart, \"ax\"; " +" .=0x10; " +" lis %r9, _zimage_start@h; " +" ori %r9, %r9, _zimage_start@l; " +" mtlr %r9; " +" blr; " +" .previous " +); + +/* This will point directly to CNS which remains mapped on entry. */ +BGCNS_Descriptor* cns; + +static void bgp_console_write(const char *msg, int len) __attribute__((unused)) ; + +static void bgp_console_write(const char *msg, int len) +{ + if (cns) + cns->services->writeToMailboxConsole((char *)msg, len); +} + +static void bgp_fixup_bluegene_cns(BGCNS_Descriptor *cns) +{ + void *node = finddevice("/ibm,bluegene/cns"); + if (node) { + setprop_val(node, "base-va", cns->baseVirtualAddress); + setprop_val(node, "base-pa", cns->basePhysicalAddress); + setprop_val(node, "size", cns->size); + setprop_val(node, "services", cns->services); + setprop_val(node, "version", cns->version); + } else { + fatal("could not find /ibm,bluegene/cns node in device tree"); + } +} + +static void bgp_fixup_bluegene_personality(BGP_Personality_t *bgpers) +{ + void *node = finddevice("/ibm,bluegene/personality"); + if (node) { + /* We could include individual fields of the personality as needed + * so that Linux doesn't need to decode the struct directly. We + * provide raw-data for external tools and daemons. + * This can replace /proc/personality + */ + unsigned frequency = bgpers->Kernel_Config.FreqMHz * 1000000; + setprop(node, "raw-data", bgpers, sizeof(*bgpers)); + setprop_val(node, "frequency", frequency); + } else { + fatal("could not find /ibm,bluegene/personality node in device tree"); + } +} + +static void bgp_fixup_bluegene_initrd(void) +{ + void *node = finddevice("/chosen"); + if (node) { + /* On Blue Gene we may have a gzipped ramdisk loaded at a fixed + * address (0x1000000). It is preceeded by a 4-byte magic value and a + * 4-byte big endian length. + */ + unsigned *rd = (unsigned *)0x1000000; /* 16M */ + + if (rd[0] == 0xf0e1d2c3 && rd[1] != 0) { + unsigned initrd_start = (unsigned)(rd+2); + unsigned initrd_len = rd[1]; + unsigned initrd_end = initrd_start + initrd_len; + setprop_val(node, "linux,initrd-start", initrd_start); + setprop_val(node, "linux,initrd-end", initrd_end); + } + } else { + fatal("could not find chosen node in device tree"); + } +} + +static void bgp_fixups(void) +{ + BGP_Personality_t *bgpers = cns->services->getPersonalityData(); + unsigned int DDRSize = (bgpers->DDR_Config.DDRSizeMB << 20) - cns->size; + unsigned int freq = bgpers->Kernel_Config.FreqMHz * 1000000; + +/* For vRNIC configurations, turn down the memory that Linux thinks is on the node so the vRNIC can map it all */ + if ( (DDRSize & 0xf0000000 ) == 0xd0000000 ) DDRSize = 0xb0000000 ; + + dt_fixup_memory(0, DDRSize); + dt_fixup_cpu_clocks(freq, freq, freq); + + bgp_fixup_bluegene_cns(cns); + bgp_fixup_bluegene_personality(bgpers); + bgp_fixup_bluegene_initrd(); + +#if 0 + /* FIXME: sysclk should be derived by reading the FPGA registers */ + unsigned long sysclk = 33000000; + + ibm440gp_fixup_clocks(sysclk, 6 * 1843200); + ibm4xx_sdram_fixup_memsize(); + dt_fixup_mac_address_by_alias("ethernet0", ebony_mac0); + dt_fixup_mac_address_by_alias("ethernet1", ebony_mac1); + ibm4xx_fixup_ebc_ranges("/plb/opb/ebc"); + ebony_flashsel_fixup(); +#endif +} + + +void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + cns = (BGCNS_Descriptor*) r3; +#if defined(CONFIG_BLUEGENE_NOISY_BOOT) + console_ops.write = bgp_console_write; +#endif + + simple_alloc_init(_end, 256 << 20, 32, 64); + + platform_ops.fixups = bgp_fixups; + platform_ops.exit = ibm44x_dbcr_reset; + fdt_init(_dtb_start); + +/* serial_console_init(); */ +} diff --git a/arch/powerpc/boot/bgp_personality.h b/arch/powerpc/boot/bgp_personality.h new file mode 100644 index 00000000000000..f4d9309640a4bf --- /dev/null +++ b/arch/powerpc/boot/bgp_personality.h @@ -0,0 +1,1086 @@ +/* + * Andrew Tauferner + * + * Copyright 2006, 2007 International Business Machines + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef BGP_PERSONALITY_H_ // Prevent multiple inclusion +#define BGP_PERSONALITY_H_ + + + + +/* #include <linux/types.h> */ + +// These defines allows use of IBM's bit numberings (MSb=0, LSb=31)for multi-bit fields +// b = IBM bit number of the least significant bit (highest number) +// x = value to set in field +// s = size +#define _BS(b,x,s)( ( ( x) & ( 0x7FFFFFFF>> ( 31- ( s)))) << ( 31- ( b))) +#define _BG(b,x,s)( ( _BS(b,0x7FFFFFFF,s) & x ) >> (31-b) ) +#define _BS64(b,x,s)( ( ( x) & ( 0x7FFFFFFFFFFFFFFFLL>> ( 63- ( s)))) << ( 63- ( b))) +#define _BG64(b,x,s)( ( _BS64(b, 0x7FFFFFFFFFFFFFFFLL,s) & x ) >> (63-b) ) +#define _BN(b) ((1<<(31-(b)))) +#define _B1(b,x) (((x)&0x1)<<(31-(b))) +#define _B2(b,x) (((x)&0x3)<<(31-(b))) +#define _B3(b,x) (((x)&0x7)<<(31-(b))) +#define _B4(b,x) (((x)&0xF)<<(31-(b))) +#define _B5(b,x) (((x)&0x1F)<<(31-(b))) +#define _B6(b,x) (((x)&0x3F)<<(31-(b))) +#define _B7(b,x) (((x)&0x7F)<<(31-(b))) +#define _B8(b,x) (((x)&0xFF)<<(31-(b))) +#define _B9(b,x) (((x)&0x1FF)<<(31-(b))) +#define _B10(b,x) (((x)&0x3FF)<<(31-(b))) +#define _B11(b,x) (((x)&0x7FF)<<(31-(b))) +#define _B12(b,x) (((x)&0xFFF)<<(31-(b))) +#define _B13(b,x) (((x)&0x1FFF)<<(31-(b))) +#define _B14(b,x) (((x)&0x3FFF)<<(31-(b))) +#define _B15(b,x) (((x)&0x7FFF)<<(31-(b))) +#define _B16(b,x) (((x)&0xFFFF)<<(31-(b))) +#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b))) +#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b))) +#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b))) +#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b))) +#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b))) +#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b))) +#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b))) +#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b))) +#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b))) +#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b))) +#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b))) +#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b))) +#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b))) +#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b))) +#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b))) + +#define BGP_UCI_Component_Rack ( 0) +#define BGP_UCI_Component_Midplane ( 1) +#define BGP_UCI_Component_BulkPowerSupply ( 2) +#define BGP_UCI_Component_PowerCable ( 3) +#define BGP_UCI_Component_PowerModule ( 4) +#define BGP_UCI_Component_ClockCard ( 5) +#define BGP_UCI_Component_FanAssembly ( 6) +#define BGP_UCI_Component_Fan ( 7) +#define BGP_UCI_Component_ServiceCard ( 8) +#define BGP_UCI_Component_LinkCard ( 9) +#define BGP_UCI_Component_LinkChip (10) +#define BGP_UCI_Component_LinkPort (11) // Identifies 1 end of a LinkCable +#define BGP_UCI_Component_NodeCard (12) +#define BGP_UCI_Component_ComputeCard (13) +#define BGP_UCI_Component_IOCard (14) +#define BGP_UCI_Component_DDRChip (15) +#define BGP_UCI_Component_ENetConnector (16) + +typedef struct BGP_UCI_Rack_t + { // "Rxy": R<RackRow><RackColumn> + unsigned Component : 5; // when BGP_UCI_Component_Rack + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned _zero : 19; // zero's + } + BGP_UCI_Rack_t; + +#define BGP_UCI_RACK_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Rack +#define BGP_UCI_RACK_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_RACK_RACKCOLUMN(x) _B4(12,x) // 0..F + + + +typedef struct BGP_UCI_Midplane_t + { // "Rxy-Mm": R<RackRow><RackColumn>-M<Midplane> + unsigned Component : 5; // when BGP_UCI_Component_Midplane + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned _zero : 18; // zero's + } + BGP_UCI_Midplane_t; + +#define BGP_UCI_MIDPLANE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Midplane +#define BGP_UCI_MIDPLANE_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_MIDPLANE_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_MIDPLANE_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top + + +typedef struct BGP_UCI_BulkPowerSupply_t + { // "Rxy-B": R<RackRow><RackColumn>-B + unsigned Component : 5; // when BGP_UCI_Component_BulkPowerSupply + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned _zero : 19; // zero's + } + BGP_UCI_BulkPowerSupply_t; + +#define BGP_UCI_BULKPOWERSUPPLY_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_BulkPowerSupply +#define BGP_UCI_BULKPOWERSUPPLY_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_BULKPOWERSUPPLY_RACKCOLUMN(x) _B4(12,x) // 0..F + + + +typedef struct BGP_UCI_PowerCable_t + { // "Rxy-B-C": R<RackRow><RackColumn>-B-C + unsigned Component : 5; // when BGP_UCI_Component_PowerCable + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned _zero : 19; // zero's + } + BGP_UCI_PowerCable_t; + +#define BGP_UCI_POWERCABLE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerCable +#define BGP_UCI_POWERCABLE_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_POWERCABLE_RACKCOLUMN(x) _B4(12,x) // 0..F + + + +typedef struct BGP_UCI_PowerModule_t + { // "Rxy-B-Pp": R<RackRow><RackColumn>-B-P<PowerModule> + unsigned Component : 5; // when BGP_UCI_Component_PowerModule + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned PowerModule : 3; // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear) + unsigned _zero : 16; // zero's + } + BGP_UCI_PowerModule_t; + +#define BGP_UCI_POWERMODULE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerModule +#define BGP_UCI_POWERMODULE_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_POWERMODULE_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_POWERMODULE_POWERMODULE(x) _B3(15,x) // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear) + + +typedef struct BGP_UCI_ClockCard_t + { // "Rxy-K": R<RackRow><RackColumn>-K + unsigned Component : 5; // when BGP_UCI_Component_ClockCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned _zero : 19; // zero's + } + BGP_UCI_ClockCard_t; + +#define BGP_UCI_CLOCKCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerModule +#define BGP_UCI_CLOCKCARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_CLOCKCARD_RACKCOLUMN(x) _B4(12,x) // 0..F + + + +typedef struct BGP_UCI_FanAssembly_t + { // "Rxy-Mm-Aa": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly> + unsigned Component : 5; // when BGP_UCI_Component_FanAssembly + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned FanAssembly : 4; // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear) + unsigned _zero : 14; // zero's + } + BGP_UCI_FanAssembly_t; + +#define BGP_UCI_FANASSEMBLY_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_FanAssembly +#define BGP_UCI_FANASSEMBLY_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_FANASSEMBLY_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_FANASSEMBLY_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_FANASSEMBLY_FANASSEMBLY(x) _B4(17,x) // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear) + + + +typedef struct BGP_UCI_Fan_t + { // "Rxy-Mm-Aa-Ff": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>-F<Fan> + unsigned Component : 5; // when BGP_UCI_Component_Fan + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned FanAssembly : 4; // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear) + unsigned Fan : 2; // 0..2 (0=Tailstock, 2=Midplane) + unsigned _zero : 12; // zero's + } + BGP_UCI_Fan_t; + +#define BGP_UCI_FAN_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Fan +#define BGP_UCI_FAN_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_FAN_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_FAN_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_FAN_FANASSEMBLY(x) _B4(17,x) // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear) +#define BGP_UCI_FAN_FAN(x) _B2(19,x) // 0..2 (0=Tailstock, 2=Midplane) + +typedef struct BGP_UCI_ServiceCard_t + { // "Rxy-Mm-S": R<RackRow><RackColumn>-M<Midplane>-S + unsigned Component : 5; // when BGP_UCI_Component_ServiceCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top (Master ServiceCard in M0) + unsigned _zero : 18; // zero's + } + BGP_UCI_ServiceCard_t; + +#define BGP_UCI_SERVICECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ServiceCard +#define BGP_UCI_SERVICECARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_SERVICECARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_SERVICECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top (Master ServiceCard in M0) + + + +typedef struct BGP_UCI_LinkCard_t + { // "Rxy-Mm-Ll": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard> + unsigned Component : 5; // when BGP_UCI_Component_LinkCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) + unsigned _zero : 16; // zero's + } + BGP_UCI_LinkCard_t; + +#define BGP_UCI_LINKCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkCard +#define BGP_UCI_LINKCARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_LINKCARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_LINKCARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_LINKCARD_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) + + + +typedef struct BGP_UCI_LinkChip_t + { // "Rxy-Mm-Ll-Uu": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-U<LinkChip> + unsigned Component : 5; // when BGP_UCI_Component_LinkChip + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) + unsigned LinkChip : 3; // 00..05: left to right from Front + unsigned _zero : 13; // zero's + } + BGP_UCI_LinkChip_t; + +#define BGP_UCI_LINKCHIP_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkChip +#define BGP_UCI_LINKCHIP_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_LINKCHIP_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_LINKCHIP_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_LINKCHIP_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) +#define BGP_UCI_LINKCHIP_LINKCHIP(x) _B3(18,x) // 00..05: left to right from Front + +typedef struct BGP_UCI_LinkPort_t + { // "Rxy-Mm-Ll-Jjj": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-J<LinkPort> + unsigned Component : 5; // when BGP_UCI_Component_LinkPort + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) + unsigned LinkPort : 4; // 00..15: left to right from Front + unsigned _zero : 12; // zero's + } + BGP_UCI_LinkPort_t; + +#define BGP_UCI_LINKPORT_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkPort +#define BGP_UCI_LINKPORT_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_LINKPORT_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_LINKPORT_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_LINKPORT_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) +#define BGP_UCI_LINKPORT_LINKPORT(x) _B4(19,x) // 00..15: left to right from Front + + +typedef struct BGP_UCI_NodeCard_t + { // "Rxy-Mm-Nnn": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard> + unsigned Component : 5; // when BGP_UCI_Component_NodeCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned _zero : 14; // zero's + } + BGP_UCI_NodeCard_t; + +#define BGP_UCI_NODECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_NodeCard +#define BGP_UCI_NODECARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_NODECARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_NODECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_NODECARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + + + +typedef struct BGP_UCI_ComputeCard_t + { // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard> + unsigned Component : 5; // when BGP_UCI_Component_ComputeCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned ComputeCard : 6; // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + unsigned _zero : 8; // zero's + } + BGP_UCI_ComputeCard_t; + +#define BGP_UCI_COMPUTECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ComputeCard +#define BGP_UCI_COMPUTECARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_COMPUTECARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_COMPUTECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_COMPUTECARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) +#define BGP_UCI_COMPUTECARD_COMPUTECARD(x) _B6(23,x) // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + + +typedef struct BGP_UCI_IOCard_t + { // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard> + unsigned Component : 5; // when BGP_UCI_Component_IOCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned ComputeCard : 6; // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + unsigned _zero : 8; // zero's + } + BGP_UCI_IOCard_t; + +#define BGP_UCI_IOCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_IOCard +#define BGP_UCI_IOCARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_IOCARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_IOCARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_IOCARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) +#define BGP_UCI_IOCARD_COMPUTECARD(x) _B6(23,x) // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + + + +typedef struct BGP_UCI_DDRChip_t + { // "Rxy-Mm-Nnn-Jxx-Uuu": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>-U<DDRChip> + unsigned Component : 5; // when BGP_UCI_Component_DDRChip + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned ComputeCard : 6; // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + unsigned DDRChip : 5; // 00..20 + unsigned _zero : 3; // zero's + } + BGP_UCI_DDRChip_t; + +#define BGP_UCI_DDRCHIP_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_DDRChip +#define BGP_UCI_DDRCHIP_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_DDRCHIP_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_DDRCHIP_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_DDRCHIP_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) +#define BGP_UCI_DDRCHIP_COMPUTECARD(x) _B6(23,x) // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) +#define BGP_UCI_DDRCHIP_DDRCHIP(x) _B5(28,x) // 00..20 + + +typedef struct BGP_UCI_ENetConnector_t + { // "Rxy-Mm-Nnn-ENe": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-EN<EN> + unsigned Component : 5; // when BGP_UCI_Component_ENetConnector + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned EN : 1; // 0..1 (Equal to IOCard number) + unsigned _zero : 13; // zero's + } + BGP_UCI_ENetConnector_t; + +#define BGP_UCI_ENETCONNECTOR_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ENetConnector +#define BGP_UCI_ENETCONNECTOR_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_ENETCONNECTOR_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_ENETCONNECTOR_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_ENETCONNECTOR_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) +#define BGP_UCI_ENETCONNECTOR_ENETCONNECTOR(x) _B1(18,x) // 0..1 (Equal to IOCard number) + + + +typedef union TBGP_UniversalComponentIdentifier + { + uint32_t UCI; + BGP_UCI_Rack_t Rack; + BGP_UCI_Midplane_t Midplane; + BGP_UCI_BulkPowerSupply_t BulkPowerSupply; + BGP_UCI_PowerCable_t PowerCable; + BGP_UCI_PowerModule_t PowerModule; + BGP_UCI_ClockCard_t ClockCard; + BGP_UCI_FanAssembly_t FanAssembly; + BGP_UCI_Fan_t Fan; + BGP_UCI_ServiceCard_t ServiceCard; + BGP_UCI_LinkCard_t LinkCard; + BGP_UCI_LinkChip_t LinkChip; + BGP_UCI_LinkPort_t LinkPort; + BGP_UCI_NodeCard_t NodeCard; + BGP_UCI_ComputeCard_t ComputeCard; + BGP_UCI_IOCard_t IOCard; + BGP_UCI_DDRChip_t DDRChip; + BGP_UCI_ENetConnector_t ENetConnector; + } + BGP_UniversalComponentIdentifier; + + + +#define BGP_PERSONALITY_VERSION (0x0A) + +#define BGP_DEFAULT_FREQ (850) + +#define BGP_PERS_PROCESSCONFIG_DIAGS (0xFF000000) // Diagnostic Mode: All Cores Enabled and Privileged in Process 0 +#define BGP_PERS_PROCESSCONFIG_SMP (0x0F000000) // All Cores Enabled User-Space in Process 0 +#define BGP_PERS_PROCESSCONFIG_VNM (0x08040201) // 4 Single-Core Processes (a.k.a. Virtual Nodes) +#define BGP_PERS_PROCESSCONFIG_2x2 (0x0C030000) // 2 Processes of 2 Cores each in same DP unit +#define BGP_PERS_PROCESSCONFIG_2x2_CROSS1 (0x09060000) // 2 Processes of 2 Cores in different DP units +#define BGP_PERS_PROCESSCONFIG_2x2_CROSS2 (0x0A050000) // 2 Processes of 2 Cores in different DP units +#define BGP_PERS_PROCESSCONFIG_3PLUS1 (0x0E010000) // 3 Cores in one Processes, 4th Core in Separate Process +#define BGP_PERS_PROCESSCONFIG_DEFAULT (BGP_PERS_PROCESSCONFIG_DIAGS) + + +// Personality.Kernel_Config.RASPolicy +#define BGP_PERS_RASPOLICY_VERBOSITY(x) _B2( 1,x) // Verbosity as shown below +#define BGP_PERS_RASPOLICY_MINIMAL BGP_PERS_RASPOLICY_VERBOSITY(0) // Benchmarking Level of Capture and Reporting +#define BGP_PERS_RASPOLICY_NORMAL BGP_PERS_RASPOLICY_VERBOSITY(1) // Normal Production Level of Capture and Reporting +#define BGP_PERS_RASPOLICY_VERBOSE BGP_PERS_RASPOLICY_VERBOSITY(2) // Manufacturing Test and Diagnostics +#define BGP_PERS_RASPOLICY_EXTREME BGP_PERS_RASPOLICY_VERBOSITY(3) // Report Every Event Immediately - Thresholds set to 1 +#define BGP_PERS_RASPOLICY_FATALEXIT _BN( 2) // Fatal is Fatal, so exit. + +#define BGP_PERS_RASPOLICY_DEFAULT (BGP_PERS_RASPOLICY_VERBOSE | BGP_PERS_RASPOLICY_FATALEXIT) + + +#define BGP_PERSONALITY_LEN_NFSDIR (32) // 32bytes + +#define BGP_PERSONALITY_LEN_SECKEY (32) // 32bytes + +// Personality.NodeConfig Driver Enables and Configurations +#define BGP_PERS_ENABLE_Simulation _BN( 0) // Running on VHDL Simulation +#define BGP_PERS_ENABLE_LockBox _BN( 1) +#define BGP_PERS_ENABLE_BIC _BN( 2) +#define BGP_PERS_ENABLE_DDR _BN( 3) // DDR Controllers (not Fusion DDR model) +#define BGP_PERS_ENABLE_LoopBack _BN( 4) // LoopBack: Internal TS/TR or SerDes Loopback +#define BGP_PERS_ENABLE_GlobalInts _BN( 5) +#define BGP_PERS_ENABLE_Collective _BN( 6) // Enable Collective Network +#define BGP_PERS_ENABLE_Torus _BN( 7) +#define BGP_PERS_ENABLE_TorusMeshX _BN( 8) // Torus is a Mesh in the X-dimension +#define BGP_PERS_ENABLE_TorusMeshY _BN( 9) // Torus is a Mesh in the Y-dimension +#define BGP_PERS_ENABLE_TorusMeshZ _BN(10) // Torus is a Mesh in the Z-dimension +#define BGP_PERS_ENABLE_TreeA _BN(11) // Enable Collective Network A-link +#define BGP_PERS_ENABLE_TreeB _BN(12) // Enable Collective Network B-link +#define BGP_PERS_ENABLE_TreeC _BN(13) // Enable Collective Network C-link +#define BGP_PERS_ENABLE_DMA _BN(14) +#define BGP_PERS_ENABLE_SerDes _BN(15) +#define BGP_PERS_ENABLE_UPC _BN(16) +#define BGP_PERS_ENABLE_EnvMon _BN(17) +#define BGP_PERS_ENABLE_Ethernet _BN(18) +#define BGP_PERS_ENABLE_JTagLoader _BN(19) // Converse with JTag Host to load kernel +#define BGP_PERS_ENABLE_MailBoxReceive BGP_PERS_ENABLE_JTagLoader +#define BGP_PERS_ENABLE_PowerSave _BN(20) // Turn off unused devices (Eth on CN, TS on ION) +#define BGP_PERS_ENABLE_FPU _BN(21) // Enable Double-Hummers (not supported in EventSim) +#define BGP_PERS_ENABLE_StandAlone _BN(22) // Disable "CIOD" interface, Requires Collective! +#define BGP_PERS_ENABLE_TLBMisses _BN(23) // TLB Misses vs Wasting Memory (see bgp_AppSetup.c) +#define BGP_PERS_ENABLE_Mambo _BN(24) // Running under Mambo? Used by Linux +#define BGP_PERS_ENABLE_TreeBlast _BN(25) // Enable Tree "Blast" mode +#define BGP_PERS_ENABLE_BlindStacks _BN(26) // For "XB" Tests, Lock 16K Stacks in Blind Device +#define BGP_PERS_ENABLE_CNK_Malloc _BN(27) // Enable Malloc Support in CNK. +#define BGP_PERS_ENABLE_Reproducibility _BN(28) // Enable Cycle Reproducibility +#define BGP_PERS_ENABLE_HighThroughput _BN(29) // Enable high throughput computing mode +#define BGP_PERS_ENABLE_DiagnosticsMode _BN(30) // Enable diagnostics mode + +// Configure L1+L2 into BG/L Mode (s/w managed L1 coherence, write-back) +// This overrides most L1, L2, and Snoop settings. Carefull!! +#define BGP_PERS_ENABLE_BGLMODE _BN(31) // (not yet fully implemented) + +// Default Setup for Simulation: Torus Meshes, DMA, SerDes, Ethernet, JTagLoader, PowerSave +#define BGP_PERS_NODECONFIG_DEFAULT (BGP_PERS_ENABLE_Simulation |\ + BGP_PERS_ENABLE_LockBox |\ + BGP_PERS_ENABLE_BIC |\ + BGP_PERS_ENABLE_DDR |\ + BGP_PERS_ENABLE_LoopBack |\ + BGP_PERS_ENABLE_GlobalInts |\ + BGP_PERS_ENABLE_Collective |\ + BGP_PERS_ENABLE_Torus |\ + BGP_PERS_ENABLE_UPC |\ + BGP_PERS_ENABLE_EnvMon |\ + BGP_PERS_ENABLE_FPU |\ + BGP_PERS_ENABLE_StandAlone) + +// Default Setup for Hardware: +// Supports Stand-Alone CNA Applications. +// Bootloader-Extensions and XB's must turn-off JTagLoader +#define BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE (BGP_PERS_ENABLE_JTagLoader |\ + BGP_PERS_ENABLE_LockBox |\ + BGP_PERS_ENABLE_BIC |\ + BGP_PERS_ENABLE_DDR |\ + BGP_PERS_ENABLE_GlobalInts |\ + BGP_PERS_ENABLE_Collective |\ + BGP_PERS_ENABLE_SerDes |\ + BGP_PERS_ENABLE_UPC |\ + BGP_PERS_ENABLE_EnvMon |\ + BGP_PERS_ENABLE_FPU |\ + BGP_PERS_ENABLE_StandAlone) + +// these fields are defined by the control system depending on compute/io node +// BGP_PERS_ENABLE_Torus | +// BGP_PERS_ENABLE_TorusMeshX | +// BGP_PERS_ENABLE_TorusMeshY | +// BGP_PERS_ENABLE_TorusMeshZ | + + + +// Personality.L1Config: Controls and Settings for L1 Cache +#define BGP_PERS_L1CONFIG_L1I _BN( 0) // L1 Enabled for Instructions +#define BGP_PERS_L1CONFIG_L1D _BN( 1) // L1 Enabled for Data +#define BGP_PERS_L1CONFIG_L1SWOA _BN( 2) // L1 Store WithOut Allocate +#define BGP_PERS_L1CONFIG_L1Recovery _BN( 3) // L1 Full Recovery Mode +#define BGP_PERS_L1CONFIG_L1WriteThru _BN( 4) // L1 Write-Thru (not svc_host changeable (yet?)) +#define BGP_PERS_L1CONFIG_DO_L1ITrans _BN( 5) // Enable L1 Instructions Transient? +#define BGP_PERS_L1CONFIG_DO_L1DTrans _BN( 6) // Enable L1 Data Transient? + // unused 9bits: 7..15 +#define BGP_PERS_L1CONFIG_L1ITrans(x) _B8(23,x) // L1 Transient for Instructions in Groups of 16 Lines +#define BGP_PERS_L1CONFIG_L1DTrans(x) _B8(31,x) // L1 Transient for Data in Groups of 16 Lines + +#define BGP_PERS_L1CONFIG_DEFAULT (BGP_PERS_L1CONFIG_L1I |\ + BGP_PERS_L1CONFIG_L1D |\ + BGP_PERS_L1CONFIG_L1SWOA |\ + BGP_PERS_L1CONFIG_L1Recovery |\ + BGP_PERS_L1CONFIG_L1WriteThru) + +typedef union TBGP_Pers_L1Cfg + { + uint32_t l1cfg; + struct { + unsigned l1i : 1; + unsigned l1d : 1; + unsigned l1swoa : 1; + unsigned l1recovery : 1; + unsigned l1writethru : 1; + unsigned do_l1itrans : 1; + unsigned do_l1dtrans : 1; + unsigned l1rsvd : 9; + unsigned l1itrans : 8; + unsigned l1dtrans : 8; + }; + } + BGP_Pers_L1Cfg; + +// Personality.L2Config: Controls and Settings for L2 and Snoop +#define BGP_PERS_L2CONFIG_L2I _BN( 0) // L2 Instruction Caching Enabled +#define BGP_PERS_L2CONFIG_L2D _BN( 1) // L2 Data Caching Enabled +#define BGP_PERS_L2CONFIG_L2PF _BN( 2) // L2 Automatic Prefetching Enabled +#define BGP_PERS_L2CONFIG_L2PFO _BN( 3) // L2 Optimistic Prefetching Enabled +#define BGP_PERS_L2CONFIG_L2PFA _BN( 4) // L2 Aggressive Prefetching Enabled (fewer deeper streams) +#define BGP_PERS_L2CONFIG_L2PFS _BN( 5) // L2 Aggressive Many-Stream Prefetching Enabled (deeper only when available buffers) +#define BGP_PERS_L2CONFIG_Snoop _BN( 6) // Just NULL Snoop Filter +#define BGP_PERS_L2CONFIG_SnoopCache _BN( 7) // Snoop Caches +#define BGP_PERS_L2CONFIG_SnoopStream _BN( 8) // Snoop Stream Registers (Disable for BG/P Rit 1.0 due to PPC450 errata) +#define BGP_PERS_L2CONFIG_SnoopRange _BN( 9) // Snoop Range Filter when possible +#define BGP_PERS_L2CONFIG_BUG824LUMPY _BN(10) // BPC_BUGS 824: Fix with Lumpy Performance +#define BGP_PERS_L2CONFIG_BUG824SMOOTH _BN(11) // BPC_BUGS 824: Fix with Smooth Performance, but -12% Memory +#define BGP_PERS_L2CONFIG_NONCOHERENT_STACKS _BN(12) // Special for Snoop diagnostics. See bgp_vmm.c + // additional bits may be used for Snoop setting tweaks + +// Default L2 Configuration: +// L2 Enabled with Multi-Stream Aggressive Prefetching +// Snoop Enabled with all filters except Range +#define BGP_PERS_L2CONFIG_DEFAULT (BGP_PERS_L2CONFIG_L2I |\ + BGP_PERS_L2CONFIG_L2D |\ + BGP_PERS_L2CONFIG_L2PF |\ + BGP_PERS_L2CONFIG_L2PFO |\ + BGP_PERS_L2CONFIG_L2PFS |\ + BGP_PERS_L2CONFIG_Snoop |\ + BGP_PERS_L2CONFIG_SnoopCache |\ + BGP_PERS_L2CONFIG_SnoopStream|\ + BGP_PERS_L2CONFIG_BUG824LUMPY) + + +// Personality.L3Config: Controls and Settings for L3 +// Note: Most bits match BGP_L3x_CTRL DCRs. +// See arch/include/bpcore/bgl_l3_dcr.h +#define BGP_PERS_L3CONFIG_L3I _BN( 0) // L3 Enabled for Instructions +#define BGP_PERS_L3CONFIG_L3D _BN( 1) // L3 Enabled for Data +#define BGP_PERS_L3CONFIG_L3PFI _BN( 2) // Inhibit L3 Prefetch from DDR +#define BGP_PERS_L3CONFIG_DO_Scratch _BN( 3) // Set up Scratch? +#define BGP_PERS_L3CONFIG_DO_PFD0 _BN( 4) // Adjust PFD0? +#define BGP_PERS_L3CONFIG_DO_PFD1 _BN( 5) // Adjust PFD1? +#define BGP_PERS_L3CONFIG_DO_PFDMA _BN( 6) // Adjust PFDMA? +#define BGP_PERS_L3CONFIG_DO_PFQD _BN( 7) // Adjust PFQD? + // 8..15 unused/available +#define BGP_PERS_L3CONFIG_Scratch(x) _B4(19,x) // Scratch 8ths: 0..8 +#define BGP_PERS_L3CONFIG_PFD0(x) _B3(22,x) // Prefetch Depth for DP0 +#define BGP_PERS_L3CONFIG_PFD1(x) _B3(25,x) // Prefetch Depth for DP1 +#define BGP_PERS_L3CONFIG_PFDMA(x) _B3(28,x) // Prefetch Depth for DMA +#define BGP_PERS_L3CONFIG_PFQD(x) _B3(31,x) // Prefetch Queue Depth + +// General L3 Configuration +typedef union TBGP_Pers_L3Cfg + { + uint32_t l3cfg; + struct { + unsigned l3i : 1; + unsigned l3d : 1; + unsigned l3pfi : 1; + unsigned do_scratch : 1; + unsigned do_pfd0 : 1; + unsigned do_pfd1 : 1; + unsigned do_pfdma : 1; + unsigned do_pfqd : 1; + unsigned rsvd : 8; + unsigned scratch : 4; + unsigned pfd0 : 3; + unsigned pfd1 : 3; + unsigned pfdma : 3; + unsigned pfqd : 3; + }; + } + BGP_Pers_L3Cfg; + +// Default L3 Configuration: +// L3 Enabled for Instructions and Data +// No Prefetch Depth overrides, No Scratch, No Scrambling. +#define BGP_PERS_L3CONFIG_DEFAULT (BGP_PERS_L3CONFIG_L3I |\ + BGP_PERS_L3CONFIG_L3D |\ + BGP_PERS_L3CONFIG_DO_PFDMA |\ + BGP_PERS_L3CONFIG_PFDMA(4)) + + +// L3 Cache and Bank Selection, and prefetching tweaks (Recommended for Power-Users) +#define BGP_PERS_L3SELECT_DO_CacheSel _BN( 0) // Adjust Cache Select setting? +#define BGP_PERS_L3SELECT_DO_BankSel _BN( 1) // Adjust Bank Select setting? +#define BGP_PERS_L3SELECT_Scramble _BN( 2) // L3 Scramble +#define BGP_PERS_L3SELECT_PFby2 _BN( 3) // Prefetch by 2 if set, else by 1 (default) if clear. +#define BGP_PERS_L3SELECT_CacheSel(x) _B5( 8,x) // PhysAddr Bit for L3 Selection (0..26) +#define BGP_PERS_L3SELECT_BankSel(x) _B5(13,x) // PhysAddr Bit for L3 Bank Selection (0..26) Must be > CacheSel. + +typedef union TBGP_Pers_L3Select + { + uint32_t l3select; + struct { + unsigned do_CacheSel : 1; + unsigned do_BankSel : 1; + unsigned l3Scramble : 1; + unsigned l3_PF_by2 : 1; // default is PreFetch by 1. + unsigned CacheSel : 5; // Physical Address Bit for L3 Selection (0..26) + unsigned BankSel : 5; // 0..26 Must be strictly greater than CacheSel. + unsigned rsvd : 18; + }; + } + BGP_Pers_L3Select; + +// Default L3 Selection Configuration: Disable overrides, but set h/w default values. +#define BGP_PERS_L3SELECT_DEFAULT (BGP_PERS_L3SELECT_CacheSel(21) |\ + BGP_PERS_L3SELECT_BankSel(26)) + +// Tracing Masks and default trace configuration +#define BGP_TRACE_CONFIG _BN( 0) // Display Encoded personality config on startup +#define BGP_TRACE_ENTRY _BN( 1) // Function enter and exit +#define BGP_TRACE_INTS _BN( 2) // Standard Interrupt Dispatch +#define BGP_TRACE_CINTS _BN( 3) // Critical Interrupt Dispatch +#define BGP_TRACE_MCHK _BN( 4) // Machine Check Dispatch +#define BGP_TRACE_SYSCALL _BN( 5) // System Calls +#define BGP_TRACE_VMM _BN( 6) // Virtual Memory Manager +#define BGP_TRACE_DEBUG _BN( 7) // Debug Events (app crashes etc) +#define BGP_TRACE_TORUS _BN( 8) // Torus Init +#define BGP_TRACE_TREE _BN( 9) // Tree Init +#define BGP_TRACE_GLOBINT _BN(10) // Global Interrupts +#define BGP_TRACE_DMA _BN(11) // DMA Setup +#define BGP_TRACE_SERDES _BN(12) // SerDes Init +#define BGP_TRACE_TESTINT _BN(13) // Test Interface, ECID, Config +#define BGP_TRACE_ETHTX _BN(14) // Ethernet Transmit +#define BGP_TRACE_ETHRX _BN(15) // Ethernet Receive +#define BGP_TRACE_POWER _BN(16) // Power Control +#define BGP_TRACE_PROCESS _BN(17) // Process/Thread Mapping +#define BGP_TRACE_EXIT_SUM _BN(18) // Report Per-Core Interrupt and Error Summary on exit() +#define BGP_TRACE_SCHED _BN(19) // Report Scheduler Information +#define BGP_TRACE_RAS _BN(20) // Report RAS Events (in addition to sending to Host) +#define BGP_TRACE_ECID _BN(21) // Report UCI and ECID on boot +#define BGP_TRACE_FUTEX _BN(22) // Trace Futex operations +#define BGP_TRACE_MemAlloc _BN(23) // Trace MMAP and Shared Memory operations +#define BGP_TRACE_WARNINGS _BN(30) // Trace Warnings +#define BGP_TRACE_VERBOSE _BN(31) // Verbose Tracing Modifier + +// Enable tracking of Regression Suite coverage and report UCI+ECID on boot +#define BGP_PERS_TRACE_DEFAULT (BGP_TRACE_CONFIG | BGP_TRACE_ECID) + + +typedef struct BGP_Personality_Kernel_t + { + uint32_t UniversalComponentIdentifier; // see include/common/bgp_ras.h + + uint32_t FreqMHz; // Clock_X1 Frequency in MegaHertz (eg 1000) + + uint32_t RASPolicy; // Verbosity level, and other RAS Reporting Controls + + // Process Config: + // Each byte represents a process (1 to 4 processes supported) + // No core can be assigned to more than 1 process. + // Cores assigned to no process are disabled. + // Cores with in a process share the same address space. + // Separate processes have distinct address spaces. + // Within each process (0 to 4 cores assigned to a process): + // Lower nibble is bitmask of which core belongs to that process. + // Upper nibble is bitmask whether that thread is privileged or user. + // Processes with zero cores do not exist. + // E.g., for Diagnostics, we use 0xFF000000, which means + // that all 4 cores run privileged in process 0. + uint32_t ProcessConfig; + + uint32_t TraceConfig; // Kernel Tracing Enables + uint32_t NodeConfig; // Kernel Driver Enables + uint32_t L1Config; // L1 Config and setup controls + uint32_t L2Config; // L2 and Snoop Config and setup controls + uint32_t L3Config; // L3 Config and setup controls + uint32_t L3Select; // L3 Cache and Bank Selection controls + + uint32_t SharedMemMB; // Memory to Reserve for Sharing among Processes + + uint32_t ClockStop0; // Upper 11Bits of ClockStop, enabled if Non-zero + uint32_t ClockStop1; // Lower 32Bits of ClockStop, enabled if Non-zero + } + BGP_Personality_Kernel_t; + + +// Defaults for DDR Config +#define BGP_PERS_DDR_PBX0_DEFAULT (0x411D1512) // PBX DCRs setting (in IBM bit numbering) +#define BGP_PERS_DDR_PBX1_DEFAULT (0x40000000) // PBX DCRs setting (in IBM bit numbering) +#define BGP_PERS_DDR_MemConfig0_DEFAULT (0x81fc4080) // MemConfig +#define BGP_PERS_DDR_MemConfig1_DEFAULT (0x0C0ff800) // MemConfig +#define BGP_PERS_DDR_ParmCtl0_DEFAULT (0x3216c008) // Parm Control +#define BGP_PERS_DDR_ParmCtl1_DEFAULT (0x4168c323) // Parm Control +#define BGP_PERS_DDR_MiscCtl0_DEFAULT (0) // Misc. Control +#define BGP_PERS_DDR_MiscCtl1_DEFAULT (0) // Misc. Control +#define BGP_PERS_DDR_CmdBufMode0_DEFAULT (0x00400fdf) // Command Buffer Mode +#define BGP_PERS_DDR_CmdBufMode1_DEFAULT (0xffc80600) // Command Buffer Mode +#define BGP_PERS_DDR_RefrInterval0_DEFAULT (0xD1000002) // Refresh Interval +#define BGP_PERS_DDR_RefrInterval1_DEFAULT (0x04000000) // Refresh Interval +#define BGP_PERS_DDR_ODTCtl0_DEFAULT (0) // ODT Control +#define BGP_PERS_DDR_ODTCtl1_DEFAULT (0) // ODT Control +#define BGP_PERS_DDR_DataStrobeCalib0_DEFAULT (0x08028a64) // Data Strobe Calibration +#define BGP_PERS_DDR_DataStrobeCalib1_DEFAULT (0xa514c805) // Data Strobe Calibration +#define BGP_PERS_DDR_DQSCtl_DEFAULT (0x00000168) // DQS Control +#define BGP_PERS_DDR_Throttle_DEFAULT (0) // DDR Throttle +//1#define BGP_PERS_DDR_DDRSizeMB_DEFAULT (4096) // Total DDR size in MegaBytes (512MB - 16384MB). +#define BGP_PERS_DDR_DDRSizeMB_DEFAULT (1024) // Total DDR size in MegaBytes (512MB - 16384MB). +//1#define BGP_PERS_DDR_Chips_DEFAULT (0x0B) // Type of DDR chips +#define BGP_PERS_DDR_Chips_DEFAULT (0x09) // Type of DDR chips +#define BGP_PERS_DDR_CAS_DEFAULT (4) // CAS Latency (3, 4, or 5) + + +#define BGP_PERS_DDRFLAGS_ENABLE_Scrub _BN(0) // Enable DDR Slow Scrub when 1 + +// DDRFLAGS default: Enable Slow Scrub. +#define BGP_PERS_DDRFLAGS_DEFAULT (BGP_PERS_DDRFLAGS_ENABLE_Scrub) + +#define BGP_PERS_SRBS0_DEFAULT (0) +#define BGP_PERS_SRBS1_DEFAULT (0) + +typedef struct BGP_Personality_DDR_t + { + uint32_t DDRFlags; // Misc. Flags and Settings + uint32_t SRBS0; // Controller 0 SRBS/CK Settings + uint32_t SRBS1; // Controller 1 SRBS/CK Settings + uint32_t PBX0; // PBX DCRs setting (in IBM bit numbering) + uint32_t PBX1; // PBX DCRs setting (in IBM bit numbering) + uint32_t MemConfig0; // MemConfig + uint32_t MemConfig1; // MemConfig + uint32_t ParmCtl0; // Parm Control + uint32_t ParmCtl1; // Parm Control + uint32_t MiscCtl0; // Misc. Control + uint32_t MiscCtl1; // Misc. Control + uint32_t CmdBufMode0; // Command Buffer Mode + uint32_t CmdBufMode1; // Command Buffer Mode + uint32_t RefrInterval0; // Refresh Interval + uint32_t RefrInterval1; // Refresh Interval + uint32_t ODTCtl0; // ODT Control + uint32_t ODTCtl1; // ODT Control + uint32_t DataStrobeCalib0; // Data Strobe Calibration + uint32_t DataStrobeCalib1; // Data Strobe Calibration + uint32_t DQSCtl; // DQS Control + uint32_t Throttle; // DDR Throttle + uint16_t DDRSizeMB; // Total DDR size in MegaBytes (512MB - 16384MB). + uint8_t Chips; // Type of DDR chips + uint8_t CAS; // CAS Latency (3, 4, or 5) + } + BGP_Personality_DDR_t; + + +typedef struct BGP_Personality_Networks_t + { + uint32_t BlockID; // a.k.a. PartitionID + + uint8_t Xnodes, + Ynodes, + Znodes, + Xcoord, + Ycoord, + Zcoord; + + // PSet Support + uint16_t PSetNum; + uint32_t PSetSize; + uint32_t RankInPSet; + + uint32_t IOnodes; + uint32_t Rank; // Rank in Block (or Partition) + uint32_t IOnodeRank; // Rank (and therefore P2P Addr) of my I/O Node + uint16_t TreeRoutes[ 16 ]; + } + BGP_Personality_Networks_t; + + +typedef struct BGP_IP_Addr_t + { + // IPv6 Addresses are 16 bytes, where the + // lower 4 (indices 12-15) can be used for IPv4 address. + uint8_t octet[ 16 ]; + } + BGP_IP_Addr_t; + + +typedef struct BGP_Personality_Ethernet_t + { + uint16_t MTU; // Initial emac MTU size + uint8_t EmacID[6]; // MAC address for emac + BGP_IP_Addr_t IPAddress; // IPv6/IPv4 address of this node + BGP_IP_Addr_t IPNetmask; // IPv6/IPv4 netmask + BGP_IP_Addr_t IPBroadcast; // IPv6/IPv4 broadcast address + BGP_IP_Addr_t IPGateway; // IPv6/IPv4 initial gateway (zero if none) + BGP_IP_Addr_t NFSServer; // IPv6/IPv4 NFS system software server address + BGP_IP_Addr_t serviceNode; // IPv6/IPv4 address of service node + + // NFS mount info + char NFSExportDir[BGP_PERSONALITY_LEN_NFSDIR]; + char NFSMountDir[BGP_PERSONALITY_LEN_NFSDIR]; + + // Security Key for Service Node authentication + uint8_t SecurityKey[BGP_PERSONALITY_LEN_SECKEY ]; + } + BGP_Personality_Ethernet_t; + + + +#define BGP_PERS_BLKCFG_IPOverCollective _BN(31) +#define BGP_PERS_BLKCFG_IPOverTorus _BN(30) +#define BGP_PERS_BLKCFG_IPOverCollectiveVC _BN(29) +#define BGP_PERS_BLKCFG_CIOModeSel(x) _B2(28,x) +#define BGP_PERS_BLKCFG_bgsysFSSel(x) _B3(26,x) +#define BGP_PERS_BLKCFG_CIOMode_Full 0 +#define BGP_PERS_BLKCFG_CIOMode_MuxOnly 1 +#define BGP_PERS_BLKCFG_CIOMode_None 2 +#define BGP_PERS_BLKCFG_bgsys_NFSv3 0 +#define BGP_PERS_BLKCFG_bgsys_NFSv4 1 +#define BGP_PERS_BLKCFG_DEFAULT (BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full) | \ + BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3)) + +typedef struct TBGP_Personality_t + { + uint16_t CRC; + uint8_t Version; + uint8_t PersonalitySizeWords; + + BGP_Personality_Kernel_t Kernel_Config; + + BGP_Personality_DDR_t DDR_Config; + + BGP_Personality_Networks_t Network_Config; + + BGP_Personality_Ethernet_t Ethernet_Config; + + uint8_t Block_Config; + uint8_t padd[7]; // Pad size to multiple of 16 bytes (== width of DEVBUS_DATA tdr) + // to simplify jtag operations. See issue #140. + } + BGP_Personality_t; + + +// Define a static initializer for default configuration. (DEFAULTS FOR SIMULATION) +// This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c +#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER { \ + 0, /* CRC */ \ + BGP_PERSONALITY_VERSION, /* Version */ \ + (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \ + { /* BGP_Personality_Kernel_t: */ \ + 0, /* MachineLocation */ \ + BGP_DEFAULT_FREQ, /* FreqMHz */ \ + BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \ + BGP_PERS_PROCESSCONFIG_DEFAULT, /* ProcessConfig */ \ + BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \ + BGP_PERS_NODECONFIG_DEFAULT, /* NodeConfig */ \ + BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \ + BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \ + BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \ + BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \ + 0, /* SharedMemMB */ \ + 0, /* ClockStop0 */ \ + 0 /* ClockStop1 */ \ + }, \ + { /* BGP_Personality_DDR_t: */ \ + BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \ + BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \ + BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \ + BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \ + BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \ + BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \ + BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \ + BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \ + BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \ + BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \ + BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \ + BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \ + BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \ + BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \ + BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \ + BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \ + BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \ + BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \ + BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \ + BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \ + BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \ + BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \ + BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \ + BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \ + }, \ + { /* BGP_Personality_Networks_t: */ \ + 0, /* BlockID */ \ + 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \ + 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \ + 0, /* PSetNum */ \ + 0, /* PSetSize */ \ + 0, /* RankInPSet */ \ + 0, /* IOnodes */ \ + 0, /* Rank */ \ + 0, /* IOnodeRank */ \ + { 0, } /* TreeRoutes[ 16 ] */ \ + }, \ + { /* BGP_Personality_Ethernet_t: */ \ + 1536, /* mtu */ \ + { 0, }, /* EmacID[6] */ \ + { { 0x00,0x00,0x00,0x00, /* IPAddress */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0xFF,0xFF,0xFF,0x70 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPGateway */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* NFSServer */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* serviceNode */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + "", /* NFSExportDir[32] */ \ + "", /* NFSMountDir[32] */ \ + { 0x00, } /* SecurityKey[32] */ \ + }, \ + 0, /* Block_Config */ \ + { 0, } /* padd[7] */ \ + } + + +// Define a static initializer for default configuration. (DEFAULTS FOR HARDWARE) +// This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c +#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER_FOR_HARDWARE { \ + 0, /* CRC */ \ + BGP_PERSONALITY_VERSION, /* Version */ \ + (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \ + { /* BGP_Personality_Kernel_t: */ \ + 0, /* MachineLocation */ \ + BGP_DEFAULT_FREQ, /* FreqMHz */ \ + BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \ + BGP_PERS_PROCESSCONFIG_SMP, /* ProcessConfig */ \ + BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \ + BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE, /* NodeConfig */ \ + BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \ + BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \ + BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \ + BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \ + 0, /* SharedMemMB */ \ + 0, /* ClockStop0 */ \ + 0 /* ClockStop1 */ \ + }, \ + { /* BGP_Personality_DDR_t: */ \ + BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \ + BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \ + BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \ + BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \ + BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \ + BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \ + BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \ + BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \ + BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \ + BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \ + BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \ + BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \ + BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \ + BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \ + BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \ + BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \ + BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \ + BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \ + BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \ + BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \ + BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \ + BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \ + BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \ + BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \ + }, \ + { /* BGP_Personality_Networks_t: */ \ + 0, /* BlockID */ \ + 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \ + 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \ + 0, /* PSetNum */ \ + 0, /* PSetSize */ \ + 0, /* RankInPSet */ \ + 0, /* IOnodes */ \ + 0, /* Rank */ \ + 0, /* IOnodeRank */ \ + { 0, } /* TreeRoutes[ 16 ] */ \ + }, \ + { /* BGP_Personality_Ethernet_t: */ \ + 1536, /* mtu */ \ + { 0, }, /* EmacID[6] */ \ + { { 0x00,0x00,0x00,0x00, /* IPAddress */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0xFF,0xFF,0xFF,0x70 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPGateway */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* NFSServer */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* serviceNode */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + "", /* NFSExportDir[32] */ \ + "", /* NFSMountDir[32] */ \ + { 0x00, } /* SecurityKey[32] */ \ + }, \ + 0, /* Block_Config */ \ + { 0, } /* padd[7] */ \ + } + + + + +#endif // Add nothing below this line. diff --git a/arch/powerpc/boot/dts/bgp.dts b/arch/powerpc/boot/dts/bgp.dts new file mode 100644 index 00000000000000..855a00808fa15b --- /dev/null +++ b/arch/powerpc/boot/dts/bgp.dts @@ -0,0 +1,127 @@ +/* + * Device Tree Source for IBM BlueGene/P + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + + * Author: Chris Ward <tjcw@uk.ibm.com> + + * Josh Boyer <jwboyer@linux.vnet.ibm.com>, David Gibson <dwg@au1.ibm.com> + * + * Cloned from 'Ebony', and revised. + * + */ + +/dts-v1/; + +/ { + #address-cells = <2>; + #size-cells = <1>; + model = "ibm,bluegenep"; + compatible = "ibm,bluegenep"; + dcr-parent = <&{/cpus/cpu@0}>; + +/* aliases { + ethernet0 = &EMAC0; + }; +*/ + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + model = "PowerPC,450"; + reg = <0x00000000>; + clock-frequency = <850000000>; + timebase-frequency = <850000000>; + i-cache-line-size = <32>; + d-cache-line-size = <32>; + i-cache-size = <32768>; /* 32 kB */ + d-cache-size = <32768>; /* 32 kB */ + dcr-controller; + dcr-access-method = "native"; + }; + + cpu@1 { + device_type = "cpu"; + model = "PowerPC,450"; + reg = <0x00000000>; + clock-frequency = <850000000>; + timebase-frequency = <850000000>; + i-cache-line-size = <32>; + d-cache-line-size = <32>; + i-cache-size = <32768>; /* 32 kB */ + d-cache-size = <32768>; /* 32 kB */ + dcr-controller; + dcr-access-method = "native"; + }; + + cpu@2 { + device_type = "cpu"; + model = "PowerPC,450"; + reg = <0x00000000>; + clock-frequency = <850000000>; + timebase-frequency = <850000000>; + i-cache-line-size = <32>; + d-cache-line-size = <32>; + i-cache-size = <32768>; /* 32 kB */ + d-cache-size = <32768>; /* 32 kB */ + dcr-controller; + dcr-access-method = "native"; + }; + + cpu@3 { + device_type = "cpu"; + model = "PowerPC,450"; + reg = <0x00000000>; + clock-frequency = <850000000>; + timebase-frequency = <850000000>; + i-cache-line-size = <32>; + d-cache-line-size = <32>; + i-cache-size = <32768>; /* 32 kB */ + d-cache-size = <32768>; /* 32 kB */ + dcr-controller; + dcr-access-method = "native"; + }; + }; + + memory { + device_type = "memory"; + reg = <0x00000000 0x00000000 0x00000000>; // from wrapper + }; + + ibm,bluegene { + cns { // from wrapper + version = <0>; + size = <0>; + base-va = <0>; + base-pa = <0>; // assume <= 4G + services = <0>; + }; + personality { // from wrapper + version = <0>; + frequency = <850000000>; + }; + }; + + chosen { + bootargs = "console=bgcons root=/dev/ram0 lpj=8500000 profile=2 log_buf_len=8388608 rdinit=/sbin/init"; + + // the bgp wrapper locates a ramdisk and updates initrd-start/end + linux,initrd-start = <0>; + linux,initrd-end = <0>; + }; +}; diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 965c237c122d78..23c6ccfdb5fbf5 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -222,6 +222,11 @@ asp834x-redboot) platformo="$object/fixed-head.o $object/redboot-83xx.o" binary=y ;; +bgp) + platformo="--section-start bgstart=0 $object/fixed-head.o $object/bgp.o" + link_address='0x00800000' + ;; + esac vmz="$tmpdir/`basename \"$kernel\"`.$ext" diff --git a/arch/powerpc/configs/44x/bgp_defconfig b/arch/powerpc/configs/44x/bgp_defconfig new file mode 100644 index 00000000000000..b90cc818cdf64f --- /dev/null +++ b/arch/powerpc/configs/44x/bgp_defconfig @@ -0,0 +1,929 @@ +# +# Automatically generated make config: don't edit +# Linux kernel version: 2.6.29.1 +# Wed May 6 13:09:35 2009 +# +# CONFIG_PPC64 is not set + +# +# Processor support +# +# CONFIG_6xx is not set +# CONFIG_PPC_85xx is not set +# CONFIG_PPC_8xx is not set +# CONFIG_40x is not set +CONFIG_44x=y +# CONFIG_E200 is not set +CONFIG_PPC_FPU=y +CONFIG_4xx=y +CONFIG_BOOKE=y +CONFIG_PTE_64BIT=y +CONFIG_PHYS_64BIT=y +CONFIG_PPC_MMU_NOHASH=y +# CONFIG_PPC_MM_SLICES is not set +CONFIG_SMP=y +CONFIG_NR_CPUS=4 +# CONFIG_NOT_COHERENT_CACHE is not set +CONFIG_L1_WRITETHROUGH=y +CONFIG_PPC32=y +CONFIG_WORD_SIZE=32 +CONFIG_ARCH_PHYS_ADDR_T_64BIT=y +CONFIG_MMU=y +CONFIG_GENERIC_CMOS_UPDATE=y +CONFIG_GENERIC_TIME=y +CONFIG_GENERIC_TIME_VSYSCALL=y +CONFIG_GENERIC_CLOCKEVENTS=y +CONFIG_GENERIC_HARDIRQS=y +# CONFIG_HAVE_SETUP_PER_CPU_AREA is not set +CONFIG_IRQ_PER_CPU=y +CONFIG_STACKTRACE_SUPPORT=y +CONFIG_HAVE_LATENCYTOP_SUPPORT=y +CONFIG_LOCKDEP_SUPPORT=y +CONFIG_RWSEM_XCHGADD_ALGORITHM=y +CONFIG_ARCH_HAS_ILOG2_U32=y +CONFIG_GENERIC_HWEIGHT=y +CONFIG_GENERIC_CALIBRATE_DELAY=y +CONFIG_GENERIC_FIND_NEXT_BIT=y +# CONFIG_ARCH_NO_VIRT_TO_BUS is not set +CONFIG_PPC=y +CONFIG_EARLY_PRINTK=y +CONFIG_GENERIC_NVRAM=y +CONFIG_SCHED_OMIT_FRAME_POINTER=y +CONFIG_ARCH_MAY_HAVE_PC_FDC=y +CONFIG_PPC_OF=y +CONFIG_OF=y +CONFIG_PPC_UDBG_16550=y +CONFIG_GENERIC_TBSYNC=y +CONFIG_AUDIT_ARCH=y +CONFIG_GENERIC_BUG=y +# CONFIG_DEFAULT_UIMAGE is not set +CONFIG_PPC_DCR_NATIVE=y +# CONFIG_PPC_DCR_MMIO is not set +CONFIG_PPC_DCR=y +CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" + +# +# General setup +# +CONFIG_EXPERIMENTAL=y +CONFIG_LOCK_KERNEL=y +CONFIG_INIT_ENV_ARG_LIMIT=32 +CONFIG_LOCALVERSION="" +CONFIG_LOCALVERSION_AUTO=y +# CONFIG_SWAP is not set +CONFIG_SYSVIPC=y +CONFIG_SYSVIPC_SYSCTL=y +CONFIG_POSIX_MQUEUE=y +# CONFIG_BSD_PROCESS_ACCT is not set +# CONFIG_TASKSTATS is not set +# CONFIG_AUDIT is not set + +# +# RCU Subsystem +# +CONFIG_CLASSIC_RCU=y +# CONFIG_TREE_RCU is not set +# CONFIG_PREEMPT_RCU is not set +# CONFIG_TREE_RCU_TRACE is not set +# CONFIG_PREEMPT_RCU_TRACE is not set +# CONFIG_IKCONFIG is not set +CONFIG_LOG_BUF_SHIFT=14 +CONFIG_GROUP_SCHED=y +CONFIG_FAIR_GROUP_SCHED=y +# CONFIG_RT_GROUP_SCHED is not set +CONFIG_USER_SCHED=y +# CONFIG_CGROUP_SCHED is not set +# CONFIG_CGROUPS is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y +CONFIG_RELAY=y +# CONFIG_NAMESPACES is not set +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set +CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y +CONFIG_EMBEDDED=y +CONFIG_SYSCTL_SYSCALL=y +CONFIG_KALLSYMS=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KALLSYMS_EXTRA_PASS=y +CONFIG_HOTPLUG=y +CONFIG_PRINTK=y +CONFIG_BUG=y +CONFIG_ELF_CORE=y +CONFIG_BASE_FULL=y +CONFIG_FUTEX=y +CONFIG_EPOLL=y +CONFIG_SIGNALFD=y +CONFIG_TIMERFD=y +CONFIG_EVENTFD=y +CONFIG_SHMEM=y +CONFIG_AIO=y +CONFIG_VM_EVENT_COUNTERS=y +CONFIG_SLUB_DEBUG=y +CONFIG_COMPAT_BRK=y +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +CONFIG_PROFILING=y +CONFIG_OPROFILE=y +CONFIG_HAVE_OPROFILE=y +# CONFIG_KPROBES is not set +CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y +CONFIG_HAVE_IOREMAP_PROT=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_ARCH_TRACEHOOK=y +CONFIG_USE_GENERIC_SMP_HELPERS=y +# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set +CONFIG_SLABINFO=y +CONFIG_RT_MUTEXES=y +CONFIG_BASE_SMALL=0 +CONFIG_MODULES=y +# CONFIG_MODULE_FORCE_LOAD is not set +CONFIG_MODULE_UNLOAD=y +# CONFIG_MODULE_FORCE_UNLOAD is not set +# CONFIG_MODVERSIONS is not set +# CONFIG_MODULE_SRCVERSION_ALL is not set +CONFIG_STOP_MACHINE=y +CONFIG_BLOCK=y +CONFIG_LBD=y +# CONFIG_BLK_DEV_IO_TRACE is not set +# CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set + +# +# IO Schedulers +# +CONFIG_IOSCHED_NOOP=y +CONFIG_IOSCHED_AS=y +CONFIG_IOSCHED_DEADLINE=y +CONFIG_IOSCHED_CFQ=y +CONFIG_DEFAULT_AS=y +# CONFIG_DEFAULT_DEADLINE is not set +# CONFIG_DEFAULT_CFQ is not set +# CONFIG_DEFAULT_NOOP is not set +CONFIG_DEFAULT_IOSCHED="anticipatory" +# CONFIG_FREEZER is not set + +# +# Platform support +# +# CONFIG_PPC_CELL is not set +# CONFIG_PPC_CELL_NATIVE is not set +# CONFIG_PQ2ADS is not set +# CONFIG_BAMBOO is not set +# CONFIG_EBONY is not set +# CONFIG_SAM440EP is not set +# CONFIG_SEQUOIA is not set +# CONFIG_TAISHAN is not set +# CONFIG_KATMAI is not set +# CONFIG_RAINIER is not set +# CONFIG_WARP is not set +# CONFIG_CANYONLANDS is not set +# CONFIG_YOSEMITE is not set +CONFIG_BGP=y +# CONFIG_XILINX_VIRTEX440_GENERIC_BOARD is not set +CONFIG_BLUEGENE=y +# CONFIG_BLUEGENE_MAMBO is not set +# CONFIG_BGP_DD1 is not set +CONFIG_BLUEGENE_TCP=y +# CONFIG_BLUEGENE_DMA_MEMCPY is not set +CONFIG_BLUEGENE_COLLECTIVE_TRACE=y +CONFIG_BLUEGENE_TORUS_TRACE=y + +CONFIG_BGP_STATISTICS=y +# CONFIG_BLUEGENE_SHARE_WITH_VRNIC is not set +# CONFIG_BLUEGENE_TCP_WITHOUT_NAPI is not set +# CONFIG_BLUEGENE_UNIPROCESSOR is not set +# CONFIG_BLUEGENE_SOCKETS is not set +CONFIG_HUGE_KMALLOC=y +CONFIG_DEBUG_ALIGNMENT_HISTOGRAM=y +# CONFIG_DEBUG_STACK_USAGE is not set +CONFIG_IBM_OCP=y +CONFIG_IBM_EMAC4=y +# CONFIG_PPC4xx_DMA is not set +CONFIG_PPC_GEN550=y +# CONFIG_IPIC is not set +# CONFIG_MPIC is not set +# CONFIG_MPIC_WEIRD is not set +# CONFIG_PPC_I8259 is not set +# CONFIG_PPC_RTAS is not set +# CONFIG_MMIO_NVRAM is not set +# CONFIG_PPC_MPC106 is not set +# CONFIG_PPC_970_NAP is not set +# CONFIG_PPC_INDIRECT_IO is not set +# CONFIG_GENERIC_IOMAP is not set +# CONFIG_CPU_FREQ is not set +# CONFIG_FSL_ULI1575 is not set +# CONFIG_SIMPLE_GPIO is not set + +# +# Kernel options +# +CONFIG_HIGHMEM=y +# CONFIG_NO_HZ is not set +# CONFIG_HIGH_RES_TIMERS is not set +CONFIG_GENERIC_CLOCKEVENTS_BUILD=y +CONFIG_HZ_100=y +# CONFIG_HZ_250 is not set +# CONFIG_HZ_300 is not set +# CONFIG_HZ_1000 is not set +CONFIG_HZ=100 +# CONFIG_SCHED_HRTICK is not set +CONFIG_PREEMPT_NONE=y +# CONFIG_PREEMPT_VOLUNTARY is not set +# CONFIG_PREEMPT is not set +CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_HAVE_AOUT is not set +# CONFIG_BINFMT_MISC is not set +CONFIG_MATH_EMULATION=y +# CONFIG_IOMMU_HELPER is not set +CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y +CONFIG_ARCH_HAS_WALK_MEMORY=y +CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y +# CONFIG_IRQ_ALL_CPUS is not set +CONFIG_ARCH_FLATMEM_ENABLE=y +CONFIG_ARCH_POPULATES_NODE_MAP=y +CONFIG_SELECT_MEMORY_MODEL=y +CONFIG_FLATMEM_MANUAL=y +# CONFIG_DISCONTIGMEM_MANUAL is not set +# CONFIG_SPARSEMEM_MANUAL is not set +CONFIG_FLATMEM=y +CONFIG_FLAT_NODE_MEM_MAP=y +CONFIG_PAGEFLAGS_EXTENDED=y +CONFIG_SPLIT_PTLOCK_CPUS=4 +CONFIG_MIGRATION=y +CONFIG_PHYS_ADDR_T_64BIT=y +CONFIG_ZONE_DMA_FLAG=1 +CONFIG_BOUNCE=y +CONFIG_VIRT_TO_BUS=y +CONFIG_UNEVICTABLE_LRU=y +# CONFIG_PPC_4K_PAGES is not set +# CONFIG_PPC_16K_PAGES is not set +CONFIG_PPC_64K_PAGES=y +CONFIG_FORCE_MAX_ZONEORDER=11 +CONFIG_PROC_DEVICETREE=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=bgcons root=/dev/ram0 lpj=8500000 profile=2 log_buf_len=8388608" +CONFIG_WRAP_COPY_TOFROM_USER=y +CONFIG_EXTRA_TARGETS="" +CONFIG_SECCOMP=y +CONFIG_ISA_DMA_API=y + +# +# Bus options +# +CONFIG_ZONE_DMA=y +CONFIG_4xx_SOC=y +CONFIG_PPC_PCI_CHOICE=y +# CONFIG_PCI is not set +# CONFIG_PCI_DOMAINS is not set +# CONFIG_PCI_SYSCALL is not set +# CONFIG_ARCH_SUPPORTS_MSI is not set +# CONFIG_PCCARD is not set +# CONFIG_HAS_RAPIDIO is not set + +# +# Advanced setup +# +# CONFIG_ADVANCED_OPTIONS is not set + +# +# Default settings for advanced configuration options are used +# +CONFIG_LOWMEM_SIZE=0x30000000 +CONFIG_PAGE_OFFSET=0xc0000000 +CONFIG_KERNEL_START=0xc0000000 +CONFIG_PHYSICAL_START=0x00000000 +CONFIG_TASK_SIZE=0xc0000000 +CONFIG_NET=y + +# +# Networking options +# +CONFIG_COMPAT_NET_DEV_OPS=y +CONFIG_PACKET=y +# CONFIG_PACKET_MMAP is not set +CONFIG_UNIX=y +# CONFIG_NET_KEY is not set +CONFIG_INET=y +# CONFIG_IP_MULTICAST is not set +# CONFIG_IP_ADVANCED_ROUTER is not set +CONFIG_IP_FIB_HASH=y +CONFIG_IP_PNP=y +CONFIG_IP_PNP_DHCP=y +CONFIG_IP_PNP_BOOTP=y +# CONFIG_IP_PNP_RARP is not set +# CONFIG_NET_IPIP is not set +# CONFIG_NET_IPGRE is not set +# CONFIG_ARPD is not set +# CONFIG_SYN_COOKIES is not set +# CONFIG_INET_AH is not set +# CONFIG_INET_ESP is not set +# CONFIG_INET_IPCOMP is not set +# CONFIG_INET_XFRM_TUNNEL is not set +CONFIG_INET_TUNNEL=y +# CONFIG_INET_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET_XFRM_MODE_TUNNEL is not set +# CONFIG_INET_XFRM_MODE_BEET is not set +# CONFIG_INET_LRO is not set +CONFIG_INET_DIAG=y +CONFIG_INET_TCP_DIAG=y +# CONFIG_TCP_CONG_ADVANCED is not set +CONFIG_TCP_CONG_CUBIC=y +CONFIG_DEFAULT_TCP_CONG="cubic" +# CONFIG_TCP_MD5SIG is not set +CONFIG_IPV6=y +# CONFIG_IPV6_PRIVACY is not set +# CONFIG_IPV6_ROUTER_PREF is not set +# CONFIG_IPV6_OPTIMISTIC_DAD is not set +# CONFIG_INET6_AH is not set +# CONFIG_INET6_ESP is not set +# CONFIG_INET6_IPCOMP is not set +# CONFIG_IPV6_MIP6 is not set +# CONFIG_INET6_XFRM_TUNNEL is not set +# CONFIG_INET6_TUNNEL is not set +# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set +# CONFIG_INET6_XFRM_MODE_TUNNEL is not set +# CONFIG_INET6_XFRM_MODE_BEET is not set +# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set +CONFIG_IPV6_SIT=y +CONFIG_IPV6_NDISC_NODETYPE=y +# CONFIG_IPV6_TUNNEL is not set +# CONFIG_IPV6_MULTIPLE_TABLES is not set +# CONFIG_IPV6_MROUTE is not set +# CONFIG_NETWORK_SECMARK is not set +# CONFIG_NETFILTER is not set +# CONFIG_IP_DCCP is not set +# CONFIG_IP_SCTP is not set +# CONFIG_TIPC is not set +# CONFIG_ATM is not set +# CONFIG_BRIDGE is not set +# CONFIG_NET_DSA is not set +# CONFIG_VLAN_8021Q is not set +# CONFIG_DECNET is not set +# CONFIG_LLC2 is not set +# CONFIG_IPX is not set +# CONFIG_ATALK is not set +# CONFIG_X25 is not set +# CONFIG_LAPB is not set +# CONFIG_ECONET is not set +# CONFIG_WAN_ROUTER is not set +# CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set + +# +# Network testing +# +# CONFIG_NET_PKTGEN is not set +# CONFIG_HAMRADIO is not set +# CONFIG_CAN is not set +# CONFIG_IRDA is not set +# CONFIG_BT is not set +# CONFIG_AF_RXRPC is not set +# CONFIG_PHONET is not set +# CONFIG_WIRELESS is not set +# CONFIG_WIMAX is not set +# CONFIG_RFKILL is not set +# CONFIG_NET_9P is not set + +# +# Device Drivers +# + +# +# Generic Driver Options +# +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_STANDALONE=y +CONFIG_PREVENT_FIRMWARE_BUILD=y +CONFIG_FW_LOADER=y +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" +# CONFIG_DEBUG_DRIVER is not set +# CONFIG_DEBUG_DEVRES is not set +# CONFIG_SYS_HYPERVISOR is not set +CONFIG_CONNECTOR=y +CONFIG_PROC_EVENTS=y +# CONFIG_MTD is not set +CONFIG_OF_DEVICE=y +# CONFIG_PARPORT is not set +CONFIG_BLK_DEV=y +# CONFIG_BLK_DEV_FD is not set +# CONFIG_BLK_DEV_COW_COMMON is not set +# CONFIG_BLK_DEV_LOOP is not set +# CONFIG_BLK_DEV_NBD is not set +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_COUNT=16 +CONFIG_BLK_DEV_RAM_SIZE=35000 +# CONFIG_BLK_DEV_XIP is not set +# CONFIG_CDROM_PKTCDVD is not set +# CONFIG_ATA_OVER_ETH is not set +# CONFIG_XILINX_SYSACE is not set +# CONFIG_BLK_DEV_HD is not set +CONFIG_MISC_DEVICES=y +# CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_93CX6 is not set +CONFIG_HAVE_IDE=y +# CONFIG_IDE is not set + +# +# SCSI device support +# +# CONFIG_RAID_ATTRS is not set +# CONFIG_SCSI is not set +# CONFIG_SCSI_DMA is not set +# CONFIG_SCSI_NETLINK is not set +# CONFIG_ATA is not set +# CONFIG_MD is not set +# CONFIG_MACINTOSH_DRIVERS is not set +CONFIG_NETDEVICES=y +# CONFIG_DUMMY is not set +# CONFIG_BONDING is not set +# CONFIG_MACVLAN is not set +# CONFIG_EQUALIZER is not set +# CONFIG_TUN is not set +# CONFIG_VETH is not set +# CONFIG_NET_ETHERNET is not set +# CONFIG_NETDEV_1000 is not set +CONFIG_NETDEV_10000=y +CONFIG_BGP_COLLECTIVE=y +CONFIG_BGP_COLLECTIVE_IP_CHECKSUM=y +CONFIG_BGP_COLLECTIVE_NAPI=n +CONFIG_BGP_DMA=y +CONFIG_BGP_TORUS=y +CONFIG_BGP_TORUS_DIAGNOSTICS=y +# CONFIG_BGP_FRANKENTORUS is not set +CONFIG_BGP_TORUS_IP_CHECKSUM=y +CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT=20 +CONFIG_BGP_TORUS_ADAPTIVE_ROUTING=y + +CONFIG_BGP_VRNIC=n +CONFIG_BGP_E10000=y +CONFIG_BGP_E10000_RXB=1048576 +CONFIG_BGP_E10000_TXB=4096 +CONFIG_BGP_E10000_IP_CHECKSUM=y +CONFIG_BGP_E10000_NAPI=y +# CONFIG_BGP_E10000_EMAC_LOOPBACK is not set +# CONFIG_BGP_E10000_PHY_LOOPBACK is not set +# CONFIG_BGP_E10000_DBG is not set + +# +# Wireless LAN +# +# CONFIG_WLAN_PRE80211 is not set +# CONFIG_WLAN_80211 is not set +# CONFIG_IWLWIFI_LEDS is not set + +# +# Enable WiMAX (Networking options) to see the WiMAX drivers +# +# CONFIG_WAN is not set +# CONFIG_PPP is not set +# CONFIG_SLIP is not set +# CONFIG_NETCONSOLE is not set +# CONFIG_NETPOLL is not set +# CONFIG_NET_POLL_CONTROLLER is not set + +CONFIG_TCP_HIATUS_COUNTS=y +CONFIG_TCP_CONGESTION_OVERRIDES=y + +# CONFIG_ISDN is not set +# CONFIG_PHONE is not set + +# +# Input device support +# +# CONFIG_INPUT is not set + +# +# Hardware I/O ports +# +# CONFIG_SERIO is not set +# CONFIG_GAMEPORT is not set + +# +# Character devices +# +# CONFIG_VT is not set +CONFIG_DEVKMEM=y +# CONFIG_SERIAL_NONSTANDARD is not set + +# +# Serial drivers +# +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=4 +CONFIG_SERIAL_8250_RUNTIME_UARTS=4 +CONFIG_SERIAL_8250_EXTENDED=y +# CONFIG_SERIAL_8250_MANY_PORTS is not set +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_SERIAL_8250_DETECT_IRQ is not set +# CONFIG_SERIAL_8250_RSA is not set + +# +# Non-8250 serial port support +# +# CONFIG_SERIAL_UARTLITE is not set +CONFIG_SERIAL_CORE=y +CONFIG_SERIAL_CORE_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +# CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL is not set +CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_HVC_UDBG is not set +# CONFIG_IPMI_HANDLER is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_NVRAM is not set +# CONFIG_GEN_RTC is not set +# CONFIG_R3964 is not set +# CONFIG_RAW_DRIVER is not set +# CONFIG_TCG_TPM is not set +# CONFIG_I2C is not set +# CONFIG_SPI is not set +CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y +# CONFIG_GPIOLIB is not set +# CONFIG_W1 is not set +# CONFIG_POWER_SUPPLY is not set +# CONFIG_HWMON is not set +CONFIG_THERMAL=y +# CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y + +# +# Sonics Silicon Backplane +# +# CONFIG_SSB is not set + +# +# Multifunction device drivers +# +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_SM501 is not set +# CONFIG_HTC_PASIC3 is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_REGULATOR is not set + +# +# Multimedia devices +# + +# +# Multimedia core support +# +# CONFIG_VIDEO_DEV is not set +# CONFIG_DVB_CORE is not set +# CONFIG_VIDEO_MEDIA is not set + +# +# Multimedia drivers +# +# CONFIG_DAB is not set + +# +# Graphics support +# +# CONFIG_VGASTATE is not set +# CONFIG_VIDEO_OUTPUT_CONTROL is not set +# CONFIG_FB is not set +# CONFIG_BACKLIGHT_LCD_SUPPORT is not set + +# +# Display device support +# +# CONFIG_DISPLAY_SUPPORT is not set +# CONFIG_SOUND is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_MMC is not set +# CONFIG_MEMSTICK is not set +# CONFIG_NEW_LEDS is not set +# CONFIG_ACCESSIBILITY is not set + +# +# InfiniBand support +# +CONFIG_INFINIBAND=y +CONFIG_INFINIBAND_USER_ACCESS=m +CONFIG_INFINIBAND_USER_MEM=y +CONFIG_INFINIBAND_ADDR_TRANS=y +CONFIG_INFINIBAND_SOFTRDMA=m +CONFIG_INFINIBAND_SOFTIWARP=m +# CONFIG_INFINIBAND_BGVRNIC is not set +# CONFIG_INFINIBAND_BGVRNIC_ETH is not set +CONFIG_INFINIBAND_IPOIB=m +# CONFIG_INFINIBAND_IPOIB_CM is not set +CONFIG_INFINIBAND_IPOIB_DEBUG=y +CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y +# CONFIG_EDAC is not set +# CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_UIO is not set +# CONFIG_STAGING is not set + +# +# File systems +# +CONFIG_EXT2_FS=y +# CONFIG_EXT2_FS_XATTR is not set +# CONFIG_EXT2_FS_XIP is not set +# CONFIG_EXT3_FS is not set +# CONFIG_EXT4_FS is not set +# CONFIG_REISERFS_FS is not set +# CONFIG_JFS_FS is not set +# CONFIG_FS_POSIX_ACL is not set +CONFIG_FILE_LOCKING=y +# CONFIG_XFS_FS is not set +# CONFIG_GFS2_FS is not set +# CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +CONFIG_DNOTIFY=y +CONFIG_INOTIFY=y +CONFIG_INOTIFY_USER=y +# CONFIG_QUOTA is not set +# CONFIG_AUTOFS_FS is not set +CONFIG_AUTOFS4_FS=y +# CONFIG_FUSE_FS is not set + +# +# CD-ROM/DVD Filesystems +# +# CONFIG_ISO9660_FS is not set +# CONFIG_UDF_FS is not set + +# +# DOS/FAT/NT Filesystems +# +# CONFIG_MSDOS_FS is not set +# CONFIG_VFAT_FS is not set +# CONFIG_NTFS_FS is not set + +# +# Pseudo filesystems +# +CONFIG_PROC_FS=y +CONFIG_PROC_KCORE=y +CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y +CONFIG_SYSFS=y +CONFIG_TMPFS=y +# CONFIG_TMPFS_POSIX_ACL is not set +CONFIG_HUGETLBFS=n +CONFIG_HUGETLB_PAGE=n +# CONFIG_CONFIGFS_FS is not set +CONFIG_MISC_FILESYSTEMS=y +# CONFIG_ADFS_FS is not set +# CONFIG_AFFS_FS is not set +# CONFIG_HFS_FS is not set +# CONFIG_HFSPLUS_FS is not set +# CONFIG_BEFS_FS is not set +# CONFIG_BFS_FS is not set +# CONFIG_EFS_FS is not set +CONFIG_CRAMFS=y +# CONFIG_SQUASHFS is not set +# CONFIG_VXFS_FS is not set +# CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set +# CONFIG_HPFS_FS is not set +# CONFIG_QNX4FS_FS is not set +# CONFIG_ROMFS_FS is not set +# CONFIG_SYSV_FS is not set +# CONFIG_UFS_FS is not set +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V3=y +# CONFIG_NFS_V3_ACL is not set +CONFIG_NFS_V4=y +CONFIG_ROOT_NFS=y +CONFIG_NFSD=y +CONFIG_NFSD_V3=y +# CONFIG_NFSD_V3_ACL is not set +# CONFIG_NFSD_V4 is not set +CONFIG_LOCKD=y +CONFIG_LOCKD_V4=y +CONFIG_NFS_COMMON=y +CONFIG_SUNRPC=y +CONFIG_SUNRPC_GSS=y +# CONFIG_SUNRPC_REGISTER_V4 is not set +CONFIG_RPCSEC_GSS_KRB5=y +# CONFIG_RPCSEC_GSS_SPKM3 is not set +# CONFIG_SMB_FS is not set +# CONFIG_CIFS is not set +# CONFIG_NCP_FS is not set +# CONFIG_CODA_FS is not set +# CONFIG_AFS_FS is not set + +# +# Partition Types +# +# CONFIG_PARTITION_ADVANCED is not set +CONFIG_MSDOS_PARTITION=y +# CONFIG_NLS is not set +# CONFIG_DLM is not set + +# +# Library routines +# +CONFIG_BITREVERSE=y +CONFIG_GENERIC_FIND_LAST_BIT=y +# CONFIG_CRC_CCITT is not set +# CONFIG_CRC16 is not set +# CONFIG_CRC_T10DIF is not set +# CONFIG_CRC_ITU_T is not set +CONFIG_CRC32=y +# CONFIG_CRC7 is not set +# CONFIG_LIBCRC32C is not set +CONFIG_ZLIB_INFLATE=y +CONFIG_PLIST=y +CONFIG_HAS_IOMEM=y +CONFIG_HAS_IOPORT=y +CONFIG_HAS_DMA=y +CONFIG_HAVE_LMB=y + +# +# Kernel hacking +# +CONFIG_PRINTK_TIME=y +CONFIG_ENABLE_WARN_DEPRECATED=y +CONFIG_ENABLE_MUST_CHECK=y +CONFIG_FRAME_WARN=1024 +CONFIG_MAGIC_SYSRQ=y +# CONFIG_UNUSED_SYMBOLS is not set +CONFIG_DEBUG_FS=y +# CONFIG_HEADERS_CHECK is not set +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_KERNEL=y +# CONFIG_DEBUG_SHIRQ is not set +CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +# CONFIG_SCHED_DEBUG is not set +# CONFIG_SCHEDSTATS is not set +# CONFIG_TIMER_STATS is not set +# CONFIG_DEBUG_OBJECTS is not set +# CONFIG_SLUB_DEBUG_ON is not set +# CONFIG_SLUB_STATS is not set +# CONFIG_DEBUG_RT_MUTEXES is not set +# CONFIG_RT_MUTEX_TESTER is not set +# CONFIG_DEBUG_SPINLOCK is not set +# CONFIG_DEBUG_MUTEXES is not set +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +# CONFIG_DEBUG_KOBJECT is not set +# CONFIG_DEBUG_HIGHMEM is not set +CONFIG_DEBUG_BUGVERBOSE=y +# CONFIG_DEBUG_INFO is not set +# CONFIG_DEBUG_VM is not set +# CONFIG_DEBUG_WRITECOUNT is not set +# CONFIG_DEBUG_MEMORY_INIT is not set +# CONFIG_DEBUG_LIST is not set +# CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_BOOT_PRINTK_DELAY is not set +# CONFIG_RCU_TORTURE_TEST is not set +# CONFIG_RCU_CPU_STALL_DETECTOR is not set +# CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_FAULT_INJECTION is not set +# CONFIG_LATENCYTOP is not set +CONFIG_SYSCTL_SYSCALL_CHECK=y +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y + +# +# Tracers +# +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_CONTEXT_SWITCH_TRACER is not set +# CONFIG_BOOT_TRACER is not set +# CONFIG_TRACE_BRANCH_PROFILING is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_DYNAMIC_PRINTK_DEBUG is not set +# CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +CONFIG_PRINT_STACK_DEPTH=64 +# CONFIG_DEBUG_STACKOVERFLOW is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_CODE_PATCHING_SELFTEST is not set +# CONFIG_FTR_FIXUP_SELFTEST is not set +# CONFIG_MSI_BITMAP_SELFTEST is not set +# CONFIG_XMON is not set +# CONFIG_IRQSTACKS is not set +# CONFIG_VIRQ_DEBUG is not set +# CONFIG_BDI_SWITCH is not set +# CONFIG_PPC_EARLY_DEBUG is not set + +# +# Security options +# +# CONFIG_KEYS is not set +# CONFIG_SECURITY is not set +# CONFIG_SECURITYFS is not set +# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_CRYPTO=y + +# +# Crypto core or helper +# +# CONFIG_CRYPTO_FIPS is not set +CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y +CONFIG_CRYPTO_AEAD2=y +CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y +CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y +# CONFIG_CRYPTO_GF128MUL is not set +# CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_CRYPTD is not set +# CONFIG_CRYPTO_AUTHENC is not set +# CONFIG_CRYPTO_TEST is not set + +# +# Authenticated Encryption with Associated Data +# +# CONFIG_CRYPTO_CCM is not set +# CONFIG_CRYPTO_GCM is not set +# CONFIG_CRYPTO_SEQIV is not set + +# +# Block modes +# +CONFIG_CRYPTO_CBC=y +# CONFIG_CRYPTO_CTR is not set +# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_ECB=y +# CONFIG_CRYPTO_LRW is not set +CONFIG_CRYPTO_PCBC=y +# CONFIG_CRYPTO_XTS is not set + +# +# Hash modes +# +# CONFIG_CRYPTO_HMAC is not set +# CONFIG_CRYPTO_XCBC is not set + +# +# Digest +# +# CONFIG_CRYPTO_CRC32C is not set +# CONFIG_CRYPTO_MD4 is not set +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_MICHAEL_MIC is not set +# CONFIG_CRYPTO_RMD128 is not set +# CONFIG_CRYPTO_RMD160 is not set +# CONFIG_CRYPTO_RMD256 is not set +# CONFIG_CRYPTO_RMD320 is not set +# CONFIG_CRYPTO_SHA1 is not set +# CONFIG_CRYPTO_SHA256 is not set +# CONFIG_CRYPTO_SHA512 is not set +# CONFIG_CRYPTO_TGR192 is not set +# CONFIG_CRYPTO_WP512 is not set + +# +# Ciphers +# +# CONFIG_CRYPTO_AES is not set +# CONFIG_CRYPTO_ANUBIS is not set +# CONFIG_CRYPTO_ARC4 is not set +# CONFIG_CRYPTO_BLOWFISH is not set +# CONFIG_CRYPTO_CAMELLIA is not set +# CONFIG_CRYPTO_CAST5 is not set +# CONFIG_CRYPTO_CAST6 is not set +CONFIG_CRYPTO_DES=y +# CONFIG_CRYPTO_FCRYPT is not set +# CONFIG_CRYPTO_KHAZAD is not set +# CONFIG_CRYPTO_SALSA20 is not set +# CONFIG_CRYPTO_SEED is not set +# CONFIG_CRYPTO_SERPENT is not set +# CONFIG_CRYPTO_TEA is not set +# CONFIG_CRYPTO_TWOFISH is not set + +# +# Compression +# +# CONFIG_CRYPTO_DEFLATE is not set +# CONFIG_CRYPTO_LZO is not set + +# +# Random Number Generation +# +# CONFIG_CRYPTO_ANSI_CPRNG is not set +# CONFIG_CRYPTO_HW is not set +# CONFIG_PPC_CLOCK is not set +# CONFIG_VIRTUALIZATION is not set diff --git a/arch/powerpc/include/asm/bgcns.h b/arch/powerpc/include/asm/bgcns.h new file mode 100644 index 00000000000000..238ad401a3cbfb --- /dev/null +++ b/arch/powerpc/include/asm/bgcns.h @@ -0,0 +1,1060 @@ +/* + * (C) Copyright IBM Corp. 2007, 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Tom Gooding, IBM + */ + + +#ifndef _BGCNS_H +#define _BGCNS_H + + +#ifndef __ASSEMBLY__ + +/*! @page CNS Common Node Services + * + * @section CNS_S10 Overview + * + * As the name implies, the <b>Common Node Services (CNS)</b> layer provides @b services + * to the kernel. These services may be simple queries abstracting various node + * specific data (such as DDR size) or they may be more sophisticated software services, + * such as common machine check handling. Additionally, some services may be implicit, + * such as the initialization of various hardware devices unique to Blue Gene, such as + * Netbus and SerDes. + * + * Services are not directly linked into the kernel, but rather are invoked from kernel + * code via a <b>service directory</b> which is itself part of an overall <b>service + * descriptor</b>. This service descriptor is constructed during initialization and + * is passed to the kernel when the kernel is booted. The service directory is a + * collection of <b>service references</b>. + * + * During partition (block) booting, ELF images are loaded onto the compute and I/O nodes. + * The bootloader (@i aka microloader) boots first and then transfers control to the Common + * Node Services layer so that it, in turn, may boot. + * + * Once the CNS layer has booted, control is transferred to the kernel so that it may also + * boot. All services provided by the CNS layer are immediately available at this time. + * + * @section CNS_S20 Programming Model + * + * A kernel running on top of the CNS layer is not statically linked to the common services. + * Instead, the services are called via function pointers provided by the services descriptor, + * which is described here: @ref _BGCNS_ServiceDirectory. + * + * The kernel must operate under the following rules and restrictions: + * @li The kernel must not alter the services descriptor. The descriptor must be treated as a read-only + * data structure even though the kernel may have the ability to alter it. Because CNS trusts the + * kernel, this also implies that the kernel must not expose the descriptor to any untrusted + * software (such as application code). + * @li The kernel must ensure that the CNS virtual memory region is mapped prior to invoking any + * service. + * @li The kernel must ensure that any data passed to services via parameters is mapped. + * Specifically, TLB entries must be mapped as shared (TID = 0) and must be either readable + * (input parameters) or readable and writeable (output parameters). + * @li The kernel must treat the virtual address range (@ref _BGCNS_Descriptor::baseVirtualAddress , + * _BGCNS_Descriptor::baseVirtualAddress + @ref _BGCNS_Descriptor::size - 1) as reserved. + * That is, the kernel must not use this region of virtual memory for anything besides accessing + * the services descriptor. + * @li The kernel must treat the physical address range (@ref _BGCNS_Descriptor::basePhysicalAddress, + * _BGCNS_Descriptor::basePhysicalAddress + _BGCNS_Descriptor::size - 1) as reserved. The + * kernel must not map this memory for any other use. + * @li The kernel must not access any of the reserved virtual address regions with TLB settings that + * are different from those used by CNS. The kernel is allowed to unmap any of the reserved + * memory TLBs for its own use. However, in such a case and per the rule above, the kernel must + * ensure that the region is mapped prior to using any CNS facilities (such as invoking a service). + * @li CNS may need to map one or more TLB entries in order to access Blue Gene devices. In such a case, + * CNS may borrow TLB entries; the TLB will be returned to its original state before the service returns + * control to the invoking kernel. Kernels may avoid this behavior for specific devices by using + * the mapDevice service. + * @li The kernel's ELF image must avoid the 256K region of memory between 0x07000000 and 0x0703FFFF. This + * region is used for the pre-relocated CNS image and will be available for general use once CNS boot + * is complete. + * @li The kernel must not alter any reserved SPRs, DCRs or memory-mapped device registers. + * + * The CNS software may behave unpredictably if any of these rules and restrictions is violated. + * + * Kernels may make the following assumptions about CNS: + * + * @li The data passed in the firmware descriptor (see below) is static. Specifically, the base addresses, + * size and service directory will not change once CNS boot is complete. + * + * @subsection CNS_21 Programming Examples + * + * @subsubsection CNS_211 Obtaining the Personality + * + * The following example shows how to fetch a copy of the Blue Gene personality structure and also + * serves as a simple example of invoking a service: + * + * @code + * + * BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time + * _BGP_Personality_t* pers = (_BGP_Personality_t*)(*descr->services->getPersonalityData)(); + * ... + * @endcode + * + * The programming model guarantees that the descriptor is static. Thus, one can provide a + * convenience method to make service invocation a little more readable + * + * @code + * + * + * static BGCNS_Descriptor* _cns_descriptor = ...; // obtained from CNS at boot time + * + * inline BGCNS_ServiceDirectory* cns() { return _cns_descriptor->services; } + * + * void foo() { + * _BGP_Personality_t* pers = (_BGP_Personality_t*)cns()->getPersonalityData(); + * ... + * } + * + * @endcode + * + * This style will be used in all of the subsequent examples. + * + * @subsubsection CNS_212 SMP Initialization + * + * Common Node Services will launch the kernel on a single core (typically core 0) and will + * leave the remaining cores parked. The kernel can activate additional cores via the @c takeCPU + * service. Here is a very simple example of such kernel code: + * + * @code + * + * void anEntryPoint(unsigned core, void* arg_not_used) { + * // Do whatever your kernel needs to do here. Typically, + * // this function never returns. You will arrive here + * // when takeCPU is invoked (below). + * } + * + * void someCodeOnTheMainThread() { + * + * // ... + * + * unsigned N = cns()->getNumberOfCores(); + * + * for (core = 1; core < N; core++) { + * if ( cns()->takeCPU(core, NULL, &anEntryPoint) != 0 ) { + * // error handling goes here + * } + * } + * + * // ... + * } + * + * @endcode + * + * @subsubsection CNS_213 Version Compatibility + * + * Common Node Services structures and APIs should remain compatible within maintenance + * releases and e-fixes. Kernel's may add a runtime check to ensure that the version + * of CNS is compatible with the version from compile time. This is done as follows: + * + * @code + * + * BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time + * + * if ( ! BGCNS_IS_COMPATIBLE(descr) ) { + * // incompatible CNS (panic?) + * } + * + * @endcode + * + * @subsubsection CNS_23 Interrupts + * + * A kernel wanting to use the CNS interrupt services would first have to enable interrupts + * for the appropriate Blue Gene BIC group and IRQ within that group. This would likely be + * done at boot time. So, for example, such a kernel could enable interrupts for the Universal + * Performance Counter (group 5, IRQ 2) to be handled by the non-critical handler of core 0 as + * follows: + * + * @code + * cns()->enableInterrupt(5, 2, BGCNS_NonCritical, 0); + * @endcode + * + * Such a kernel might also maintain a collection of routines that act as subhandlers of the + * non-critical interrupt handler. In this example, we'll assume it is simply a two + * dimensional array indexed by group and IRQ: + * + * @code + * subhandlers[5][2] = &theUpcSubHandler; + * @endcode + * + * That kernel's non-critical interrupt handler would then typically handle all interrupts by + * successively invoking the getInterrupt() service to determine the group and IRQ, and then + * dispatching the appropriate subhandler. Additionally, the interrupt will be acknowledged + * so to avoid continuous interruption: + * + * @code + * unsigned grp, irq; + * + * while ( cns()->getInterrupt(&g, &i, BGCNS_NonCritical) == 0) { + * (*subhandlers[g][i])(); // dispatch the handler + * cns()->acknowledgeInterrupt(g,i); // ack the interrupt + * } + * @endcode + * + * @subsubsection CNS_24 Global Barriers and Interrupts + * + * The Blue Gene/P Global Interrupt Controller (aka GLINT) provides 4 independent channels + * that may be configured as either a global barrier or a global interrupt. + * + * Barriers are constructed by invoking the barrier service: + * + * @code + * unsigned channel = 0; + * + * // synchronize: + * int reset = 1; + * int rc; + * while ( (rc = cns()->globalBarrier_nonBlocking(channel, reset, 1000)) == BGCNS_RC_CONTINUE ) { + * reset = 0; + * } + * + * if ( rc == BGCNS_RC_COMPLETE ) { + * // good path + * } + * else { + * // error + * } + * @endcode + * + * Similarly, a barrier with a timeout can also be constructed: + * + * @code + * unsigned channel = 0; + * int reset = 1; + * unsigned long long startTime = ...; // obtain current time + * int rc; + * + * while ( (rc = cns()->globalBarrier_nonBlocking(channel,reset, 1000)) == BGCNS_RC_CONTINUE ) { + * reset = 0; + * unsigned long long currentTime = ...; // obtain current time + * if ( currentTime - startTime > timeout ) + * break; + * } + * + * if ( rc == BGCNS_RC_COMPLETE ) { + * // good path + * } + * else { + * // timeout or error + * } + * @endcode + * + * A node may opt out of a barrier channel via the disableBarrier service: + * + * @code + * + * // some other synchronization mechanism needs to go here + * + * cns()->disableBarrier(channel); + * + * @endcode + * + * Conversely, it may opt back in: + * + * @code + * cns()->enableBarrier(channel, user_mode); + * @endcode + * + * By default, CNS reserves the use of channel 2 as a global interrupt for environmental + * monitoring. It also reserves channel 3 for use as a supervisory mode, compute-node + * only barrier. Compute node kernels are free to share this channel for the same + * purpose (compute node, supervisory barrier). The enable/disable barrier services + * may return errors if operating on a reserved channel. + * + * NOTE: The standard BG/P software stack, which includes I/O node Linux and Compute Node + * Kernel (CNK) uses channel 0 as an I/O node barrier during boot and transforms it to an + * compute-node only barrier when jobs execute. + * + * + * @section CNS_3 DMA Services + * + * The DMA services provided in CNS are low-level services. Interested readers of this area should + * also look at the documentation for the DMA SPIs, which are at a slightly higher level. + * + * + * + * @section CNS_4 Reserved and Preferred Addresses + * + * + * The following virtual memory regions are reserved and must be avoided by + * kernels: + * + * @code + * + * +------------+------------+------+----------------------+-----------------------+ + * | Lower | Upper | Size | Usage | Attributes | + * +------------+------------+------+----------------------+-----------------------+ + * | CNSlow[1] | CNShigh[2] | 256K | CNS | I, Rs, Ws, Xs | + * +------------+------------+------+----------------------+-----------------------+ + * + * [1] CNSlow = descr->baseVirtualAddress , usually 0xFFF40000 + * [2] CNShigh = descr->baseVirtualAddress + descr->size - 1; usually 0xFFF7FFFF + * + * @endcode + * + * The following virtual memory regions are used by default in CNS. Kernels that wish to have + * a different memory map may do so via the mapDevice service. + * + * @code + * +------------+------------+------+----------------------+-----------------------+ + * | Lower | Upper | Size | Usage | Attributes | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFB0000 | 0xFFFCFFFF | 64K | Torus | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFD0000 | 0xFFFD3FFF | 16K | DMA | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFD9000 | 0xFFFD9FFF | 4K | DevBus | I, G, Rs, Ws | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFDA000 | 0xFFFDAFFF | 4K | UPC | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFDC000 | 0xFFFDD3FF | 4K | Collective | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFDE000 | 0xFFFDEFFF | 4K | BIC | I, G, Rs, Ws, Xs | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFF0000 | 0xFFFF3FFF | 16K | Lockbox (supervisor) | I, G, Rs, Ws | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFF4000 | 0xFFFF7FFF | 16K | Lockbox (user) | I, G, Rs, Ws, Ru, Wu | + * +------------+------------+------+----------------------+-----------------------+ + * | 0xFFFF8000 | 0xFFFFFFFF | 32K | SRAM | SWOA, WL1, Rs, Ws, Xs | + * +------------+------------+------+----------------------+-----------------------+ + * @endcode + * + */ + + +#define BGCNS_VERSION 0x01030000 /* V1R3M0 efix 0 */ +#define BGCNS_IS_COMPATIBLE(descr) ( ((descr)->version & 0xFFFF0000) == (BGCNS_VERSION & 0xFFFF0000) ) //!< True iff the given descriptor is compatible with this version of CNS + +/* ! @enum BGCNS_InterruptType */ +/* ! @brief Defines the different types of interrupts known to */ +/* ! Common Node Services. */ +typedef enum { + BGCNS_NonCritical, //!< Non-critical interrupt + BGCNS_Critical, //!< Critical interrupt + BGCNS_MachineCheck, //!< Machine check +} BGCNS_InterruptType; + +/* ! @enum BGCNS_FifoOperation */ +/* ! @brief Defines the types of FIFO operations */ +/* ! @see _BGCNS_ServiceDirectory::setDmaFifoControls */ +/* ! @see _BGCNS_ServiceDirectory::setDmaLocalCopies */ +/* ! @see _BGCNS_ServiceDirectory::setDmaPriority */ +typedef enum { + BGCNS_Disable = 0, + BGCNS_Enable = 1, + BGCNS_Reenable = 2 +} BGCNS_FifoOperation; + +/* ! @enum BGCNS_FifoFacility */ +/* ! @brief Defines the various types of FIFO facilities */ +typedef enum { + BGCNS_InjectionFifo, //!< Normal Injection FIFO + BGCNS_ReceptionFifo, //!< Normal Reception FIFO + BGCNS_ReceptionHeaderFifo, //!< Reception Header FIFO (typically used only for debugging) + BGCNS_InjectionFifoInterrupt, + BGCNS_ReceptionFifoInterrupt, + BGCNS_ReceptionHeaderFifoInterrupt, + BGCNS_InjectionCounterInterrupt, + BGCNS_ReceptionCounterInterrupt +} BGCNS_FifoFacility; + +/* ! @enum BGCNS_LinkType */ +/* ! @brief Defines the types of MAC links. */ +/* ! @see _BGCNS_ServiceDirectory::macTestLink */ +typedef enum { + BGCNS_Transmitter, //!< A transmitter link. + BGCNS_Receiver //!< A receiver link. +} BGCNS_LinkType; + +/* ! @enum BGCNS_EnvmonParameter */ +/* ! @brief Enumerates the various environmental monitor parameters. */ +/* ! @see _BGCNS_ServiceDirectory::getEnvmonParm */ +/* ! @see _BGCNS_ServiceDirectory::setEnvmonParm */ +typedef enum { + BGCNS_envmon_period = 0, + BGCNS_envmon_policy, + BGCNS_envmon_globintwire, + + /* temporary */ + BGCNS_envmon_duration, + BGCNS_envmon_ddrratio, + BGCNS_envmon_numparms +} BGCNS_EnvmonParameter; + + +#define BGCNS_RC_COMPLETE 0 //!< Indicates that the operation completed normally. +#define BGCNS_RC_CONTINUE 1 //!< Indicates that the operation is still in progress. +#define BGCNS_RC_TIMEOUT -1 //!< Indicates that the operation timed out. +#define BGCNS_RC_ERROR -2 //!< Indicates that the operation failed. + +#define BGCNS_NUM_DMA_RECEPTION_GROUPS 4 +#define BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP 8 + +/* ! @brief Describes the mapping of physical torus reception FIFOs to DMA reception FIFOs (rmFIFOs). */ +/* ! The first dimension indexes DMA reception groups, which are a combination of PID0 and PID1 bits */ +/* ! from the DMA packet. */ +/* ! */ +/* ! The second dimension indexes through the different dimensions: X+, X-, Y+, Y-, Z+, Z-, high priority */ +/* ! and local copy. */ +typedef unsigned char BGCNS_ReceptionMap[BGCNS_NUM_DMA_RECEPTION_GROUPS][BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP]; + +/* ! @brief Indicates that an interrupt is to be broadcast on all cores. */ +/* ! @see _BGCNS_ServiceDirectory::enableInterrupt */ +#define BGCNS_ALL_CORE_BROADCAST 0xFFFFFFFFu + + +/* ! @enum BGCNS_DeviceMasks */ +/* ! @brief Provides a list of masks for various Blue Gene devices */ + +typedef enum { + BGCNS_SRAM = 0x80000000u, + BGCNS_BIC = 0x40000000u, + BGCNS_Torus = 0x20000000u, + BGCNS_DevBus = 0x10000000u, + BGCNS_XEMAC = 0x08000000u, + BGCNS_LockBox = 0x04000000u, + BGCNS_Collective = 0x02000000u, + BGCNS_SRAM_Err = 0x01000000u, + BGCNS_DMA = 0x00800000u, + BGCNS_UPC = 0x00400000u +} BGCNS_DeviceMasks; + +/* ! @typedef BGCNS_ServiceDirectory */ +/* ! @struct _BGCNS_ServiceDirectory */ +/* ! @brief The service directory is a collection of function pointers to services */ +/* ! provided by the Common Node Services. */ +typedef struct _BGCNS_ServiceDirectory { + + /*------------------------------------------*/ + /*--- Informational services for the node --*/ + /*------------------------------------------*/ + + + int (*isIONode)(void); //!< Returns 1 if this is an I/O node; 0 if not. + + + /*-----------------------------------------------------------------*/ + /*--- Informational services for obtaining Raw personality data ---*/ + /*-----------------------------------------------------------------*/ + + unsigned int (*getPersonalitySize)(void); //!< Returns the size (in bytes) of the Blue Gene personality. + void* (*getPersonalityData)(void); //!< Returns a pointer to the raw personality data. + + + /*-----------------------------------------------*/ + /*--- Services for Symmetric Multi-Processing ---*/ + /*-----------------------------------------------*/ + + + unsigned (*getNumberOfCores)(void); //!< Returns the number of CPUs on this node. + + /* ! @brief Called by the kernel to activate a CPU. */ + /* ! @param[in] cpu The index of the cpu (core) to be activated. */ + /* ! @param[in] entry The (kernel) entry point function. This function will be invoked when */ + /* ! the CPU is actually activated. */ + /* ! @param[in] arg A pointer to the lone argument to be passed to the entry point. */ + /* ! @return Zero (0) if the CPU was succsessfully activated. Non-zero if the CPU was not */ + /* ! activated (e.g. invalid cpu argument, or the cpu has already been */ + /* ! activated). */ + /* ! @remarks See Section x of the Common Node Services overview for details. */ + int (*takeCPU)(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg)); + + + /*--------------------------------------*/ + /*--- Services for Blue Gene devices ---*/ + /*--------------------------------------*/ + + /* ! @brief Checks active devices for a clean termination state and returns 0 */ + /* ! if everything is nominal. Returns non-zero if any anomaly is */ + /* ! detected and logs violations. */ + /* ! @param[in] job_rc specifies the return code of the job that is terminating. */ + int (*terminationCheck)(int job_rc); + + /*-------------------------------*/ + /*--- Services for interrupts ---*/ + /*-------------------------------*/ + + + /* ! @brief Enables the specified interrupt. For all interrupts except inter-processor */ + /* ! interrupts, the interrupt will bendled by the specified core. */ + /* ! @param[in] group Specifies the Blue Gene interrupt group */ + /* ! @param[in] irq Specifies the interrupt index within the group */ + /* ! @param[in] itype Specifies the type of interrupt that hardware will present */ + /* ! for this group/irq. */ + /* ! @param[in] core Specifies which core will handle the interrupt. If specified as */ + /* ! BGCNS_ALL_CORE_BROADCAST, then all cores will handle the interrupt. */ + /* ! @return Returns zero (0) if the interrupt is enabled and returns non-zero if it was not */ + /* ! (including the case of bad arguments). */ + int (*enableInterrupt)(unsigned group, unsigned irq, BGCNS_InterruptType itype, unsigned core); + + /* ! @brief Disables the specified interrupt. */ + /* ! @param[in] group Specifies the Blue Gene interrupt group */ + /* ! @param[in] irq Specifies the interrupt index within the group */ + /* ! @return Returns zero (0) if the interrupt is disabled and returns non-zero if it was not */ + /* ! (including the case of bad arguments). */ + int (*disableInterrupt)(unsigned group, unsigned irq); + + /* ! @brief Queries the Blue Gene interrupt hardware for interrupts of the given */ + /* ! type and returns the group/IRQ. This service is typically used in the */ + /* ! context of an interrupt handler. Since multiple interrupt conditions */ + /* ! may be present, the service is typically invoked from the handler */ + /* ! (along with corresponding acknowledgement) until the return code */ + /* ! indicates that no more interrupts are present. */ + /* ! @param[out] group Specifies the Blue Gene interrupt group. The value is valid */ + /* ! only when the return code is 0. */ + /* ! @param[out] irq Specifies the interrupt index within the group. The value is */ + /* ! valid only when the reutrn code is zero. */ + /* ! @param[in] itype Specifies the type of interrupt being queried. */ + /* ! @return Returns zero (0) if an interrupt condition of the specified type exists. Returns -1 */ + /* ! if no such condition exists. */ + int (*getInterrupt)(BGCNS_InterruptType itype, unsigned* group, unsigned* irq); + + /* ! @brief Acknowledges the specified interrupt, thus clearing the interrupt */ + /* ! condition in the interrupt controller hardware. */ + /* ! @param[in] group Specifies the Blue Gene interrupt group */ + /* ! @param[in] irq Specifies the interrupt index within the group */ + /* ! @return Returns zero (0) if the interrupt is acknowledged and returns non-zero if it was not */ + /* ! (including the case of bad arguments). */ + /* ! @remarks Note that for some interrupts, it is not sufficient to only acknowledge */ + /* ! the interrupt; the hardware condition that triggered the interrupt may */ + /* ! also need to be cleared. */ + int (*acknowledgeInterrupt)(unsigned group, unsigned irq); + + /* ! @brief Raises the specified interrupt. */ + /* ! @param[in] group Specifies the Blue Gene interrupt group */ + /* ! @param[in] irq Specifies the interrupt index within the group */ + int (*raiseInterrupt)(unsigned group, unsigned irq); + + + /*------------------------*/ + /*--- Mailbox services ---*/ + /*------------------------*/ + + unsigned (*getMailboxMaximumConsoleInputSize)(void); //!< Returns the actual maximum console message input data size. + unsigned (*getMailboxMaximumConsoleOutputSize)(void); //!< Returns the actual maximum console message output data size. + + /* ! @brief Writes a text message to the output mailbox. */ + /* ! @param[in] msg a pointer to the message to be written. */ + /* ! @param[in] msglen the length (in bytes) of the message to be written. */ + /* ! @remarks As with all common services, the message data area must be mapped via */ + /* ! the TLB when the service is called. The behavior is not defined if this */ + /* ! is not the case. */ + /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */ + /* wrong (including a message that is too large). */ + int (*writeToMailboxConsole)(char *msg, unsigned msglen); + + /* ! @brief Writes a text message to the output mailbox but does not wait for a */ + /* ! response back from the control system. When this service is used, */ + /* ! the caller must poll for completion using the testForOutboxCompletion */ + /* ! service. */ + /* ! @param[in] msg a pointer to the message to be written. */ + /* ! @param[in] msglen the length (in bytes) of the message to be written. */ + /* ! @remarks As with all common services, the message data area must be mapped via */ + /* ! the TLB when the service is called. The behavior is not defined if this */ + /* ! is not the case. */ + /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */ + /* wrong (including a message that is too large). */ + int (*writeToMailboxConsole_nonBlocking)(char* msg, unsigned msglen); + + /* ! @brief Tests the outbox to see if the last message was picked up by the control */ + /* ! system. */ + /* ! @return Zero (0) if the last message was piecked and returns non-zero if it has not. */ + /* ! @remarks Typically the caller will invoke this service after having called */ + /* ! writeToMailboxConsole_nonBlocking and will then invoke this service in a */ + /* ! loop until zero is returned. */ + int (*testForOutboxCompletion)(void); + + /* ! @brief Reads a message from the input mail box. */ + /* ! @param msg a pointer to a data area into which the message will be placed. */ + /* ! @param maxMsgSize gives the size of the data area, i.e. the largest message */ + /* ! that may be safely received into the buffer. */ + /* ! @return The actual length of the message (0 if no message was receieved). */ + /* ! @remarks As with all common services, the message data area must be mapped */ + /* ! via the TLB when this service is called. The results are not defined if */ + /* ! this is not the case. */ + unsigned (*readFromMailboxConsole)(char *buf, unsigned bufsize); + + int (*testInboxAttention)(void); //!< Returns 1 if something is available in the input mailbox. + + int (*_no_longer_in_use_1_)(void); //!< Obsolete ... do not use. + + int (*writeToMailbox)(void* message, unsigned length, unsigned cmd); + + /*------------------------------------*/ + /*--- RAS and diagnostic services ---*/ + /*------------------------------------*/ + + /* ! @brief TBD */ + void (*machineCheck)(void *regs); + + /* ! @brief Writes a RAS event to the log. */ + /* ! @param[in] facility The facility (aka component). */ + /* ! @param[in] unit The unit (aka subcomponent). */ + /* ! @param[in] err_code The error code. */ + /* ! @param[in] numDetails The number of additional details. */ + /* ! @param[in] details The list of additional details. */ + /* ! @return Zero if the message was written, non-zero if some error condition occurred. */ + /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */ + int (*writeRASEvent)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] ); + + /* ! @brief Writes a RAS string to the log. */ + /* ! @param[in] facility The facility (aka component). */ + /* ! @param[in] unit The unit (aka subcomponent). */ + /* ! @param[in] err_code The error code. */ + /* ! @param[in] str The message string being written (ASCII encoded, null-terminated). Note that the length of this string is */ + /* ! limited to _BGP_RAS_ASCII_MAX_LEN characters. The implementation may choose to truncate the string if it exceeds this */ + /* ! length. */ + /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */ + /* ! string was truncated). */ + /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */ + int (*writeRASString)( unsigned facility, unsigned unit, unsigned short err_code, char* str ); + + + /*---------------------------------*/ + /*--- Global Interrupt services ---*/ + /*---------------------------------*/ + + /* ! @brief A global (compute node) barrier. This call will block until all other compute nodes */ + /* ! in the partition also arrive at the barrier. */ + int (*globalBarrier)(void); + + /* ! @brief A global (compute node) barrier. This call will block until all other compute nodes */ + /* ! in the partition also arrive at the barrier or until the timeout is reached. */ + /* ! @param timeoutInMillis specifies the timeout duration. Units are milliseconds. */ + /* ! @return BGCNS_RC_COMPLETE if the barrier completed. BGCNS_RC_TIMEOUT if the barrier timed */ + /* ! out. BGCNS_RC_ERROR if some other error occurred. */ + int (*globalBarrierWithTimeout)(unsigned timeoutInMillis); + + + + /*-------------------------*/ + /*--- Network services ---*/ + /*-------------------------*/ + + + void (*initializeNetworks)(void); //!< @todo Is this is going away??? Talk to Andy + + void (*_no_longer_in_use_381)(void); //!< @warning Do not use + + void (*_no_longer_in_use_384)(void);//!< @warning Do not use + + + /*--------------------------*/ + /*--- DMA unit services ---*/ + /*--------------------------*/ + +#define BGCNS_DMA_CAPTURE_X_PLUS 0 //!< watch the X+ receiver +#define BGCNS_DMA_CAPTURE_X_MINUS 1 //!< watch the X- receiver +#define BGCNS_DMA_CAPTURE_Y_PLUS 2 //!< watch the Y+ receiver +#define BGCNS_DMA_CAPTURE_Y_MINUS 3 //!< watch the Y- receiver +#define BGCNS_DMA_CAPTURE_Z_PLUS 4 //!< watch the Z+ receiver +#define BGCNS_DMA_CAPTURE_Z_MINUS 5 //!< watch the Z- receiver +#define BGCNS_DMA_CAPTURE_DISABLE 7 //!< disable link capturing + + /* ! @brief Sets the link capture facility of the DMA unit to watch the specified */ + /* ! receiver (or disable). */ + /* ! @param[in] link Specifies the link being monitored. Use the BGCNS_DMA_CAPTURE_* */ + /* ! mnemonics defined above. */ + /* ! @return Zero if the operation succeeded, non-zero if it did not (e.g. an invalid */ + /* ! link was specified). */ + int (*setDmaLinkCapture)(int link); + + /* ! @brief Clears the link capture unit so that another packet can be captured. */ + void (*clearDmaLinkCapture)(void); + +#define BGCNS_RC_DMA_NO_PACKET_CAPTURED 0 +#define BGCNS_RC_DMA_CAPTURE_UNIT_ERROR -1 +#define BGCNS_RC_DMA_DATA_CONFLICT -2 //!< if initial read indicates a bad packet is captured but subsequent read shows bad packet not captured +#define BGCNS_RC_DMA_DATA_CONFLICT2 -3 //!< if bad packet is captured, but all the bytes are the same + /* ! @brief Reads the DMA link capture packets. */ + int (*readDmaLinkCapturePackets)(unsigned char* good_packet, int* good_packet_size, unsigned char* bad_packet, int* bad_packet_size); + + +#define BGCNS_DMA_ALL_GROUPS 0xFFFFFFFF + + /* ! @brief Sets FIFO controls for the DMA unit. */ + /* ! */ + /* ! An operation on facility BGCNS_InjectionFifo enables or disables a subset of the 128 DMA injection FIFOs. */ + /* ! The FIFOs are organized into four groups of 32. The mask argument is a bit mask (bit i controls the i-th imFIFO */ + /* ! within that group, that is the (group*32)+i imFIFO. */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionFifo enables or disables a subset of the 32 DMA reception FIFOs. */ + /* ! The group argument is ignored and the mask argument is a bit mask (bit i controls the i-th reception FIFO). */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionHeaderFifo enables or disables the header FIFO for the specified */ + /* ! group. The mask argument is ignored. Note that the header FIFO is typically used for debugging. */ + /* ! */ + /* ! An operation on facility BGCNS_InjectionFifoInterrupt enables or disables threshold interrupts for the */ + /* ! specified injection FIFO. Threshold interrupts occur if available space is less than the configured */ + /* ! threshold when the FIFO is used for a remote get operation. The group and mask arguments are as */ + /* ! described in the BGCNS_InjectionFifo operation (above). */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionFifoInterrupt enables or disables interrupts for the specified */ + /* ! reception FIFO(s). If enabled, an interrupt will occur when the reception FIFO's available space drops */ + /* ! below the configured threshold. The group argument selects the interrupt type (type 0, 1, 2 or 3). */ + /* ! The mask argument is a bit mask selecting one or more of the 32 normal reception FIFOs. */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionHeaderFifoInterrupt enables or disables interrupts for the specified */ + /* ! reception header FIFO. Reception header FIFOs are used for debug purposes only. */ + /* ! */ + /* ! An operation on facility BGCNS_InjectionCounterInterrupt enables or disables "Counter Hit Zero" interrupts. */ + /* ! The group argument does not specify counter group, but rather specifies interrupt 0, 1, 2 or 3. The mask */ + /* ! argument is a bit mask that selects one or more counter subgroups to operate on (the 256 injection counters */ + /* ! are partitioned into 32 subgroups of 8 counters). */ + /* ! */ + /* ! An operation on facility BGCNS_ReceptionCounterInterrupt enables or disables "Counter Hit Zero" interrupts */ + /* ! for reception counters. The group and mask arguments are the as as described in the the */ + /* ! BGCNS_InjectionCounterInterrupt operation (above). */ + /* ! */ + /* ! The buffer argument is used as a means to save/restore in an opaque manner. This is achieved by passing */ + /* ! a non-NULL buffer to a disable operation and subsequently passing that buffer during a reenable */ + /* ! operation (the buffer is used to snapshot state). */ + /* ! */ + /* ! */ + /* ! @code */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | Facility | group | mask | Notes | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_InjectionFifo | 0..3 | 32 bits | [1] | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_ReceptionFifo | n/a | 32 bits | [2] | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_ReceptionHeaderFifo | 0..3, ALL | N/A | | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_InjectionFifoInterrupt | 0..3 | 32 bits | [1] | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_ReceptionFifoInterrupt | 0..3 | 32 bits | [3] | */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_InjectionCounterInterrupt | 0..3 | 32 bits | [3][4]| */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! | BGCNS_ReceptionCounterInterrupt | 0..3 | 32 bits | [3][4]| */ + /* ! +---------------------------------+-----------+---------+-------+ */ + /* ! */ + /* ! [1] There are 128 injection FIFOs partitioned into 4 groups of 32. */ + /* ! [2] There are 32 normal reception FIFOs in BG/P. */ + /* ! [3] There are 4 interrupt lines. The group argument selects one these 4. */ + /* ! [4] There are 256 counters of each type (injection and reception). The */ + /* ! 32-bit mask partitions them into groups of 8. */ + /* ! */ + /* ! @endcode */ + /* ! */ + /* ! @param[in] operation defines the type of operation being performed (enable, disable, or re-enable). */ + /* ! @param[in] facility defines the type of FIFO being configured. */ + /* ! @param[in] group is interpreted differently based on the facility. */ + /* ! @param[in] mask is interpreted differently based on the facility. */ + /* ! @param[out] buffer is interpreted differently based on the operation and facility. It is generally used to capture */ + /* ! a copy of the facility's current state in an enable operation (and may be null, in which case it is ignored). It is */ + /* ! generally used as the value to be loaded in a re-enable operation. In this manner, a state value captured by an enable */ + /* ! operation may be easily restored by a subsequent re-enable operation. The buffer argument is generally ignored by */ + /* ! disable operations. */ + int (*setDmaFifoControls)(BGCNS_FifoOperation operation, BGCNS_FifoFacility facility, unsigned group, unsigned mask, unsigned* buffer); + + /* ! @brief Maps injection FIFOs onto physical (torus hardware) FIFOs. */ + /* ! @param[in] group specifies the injection FIFO group. */ + /* ! @param[in] fifoIds is an array of length numberOfFifos whose elements are the identifiers of the imFIFO (within that */ + /* ! given group). */ + /* ! @param[in] injection_map is an array of length numberOfFifos whose elements are 8-bit masks identifying which of the */ + /* ! physical torus injection FIFOs are mapped. Bits 0-3 correspond to torus group 0, and bits 4-7 correspond to torus */ + /* ! group 1. Bits 3 and 7 are the high priority FIFOs. */ + /* ! @param[in] numberOfFifos describes the number of elements contained in the fifoIds and injection_map arguments. */ + /* ! @return Zero if the map was properly set. Non-zero if it was not, including the case of illegal arguments. */ + /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */ + /* ! argument is 0..3 and the legal range for the fifoIds[] elements is 0..31. */ + + int (*setDmaInjectionMap)(unsigned group, unsigned fifoIds[], unsigned char injection_map[], unsigned numberOfFifos); + + /* ! @brief Enables or disables "local copy" behavior for the specified injection FIFOs. A local copy injection FIFO */ + /* ! can be used to perform memory copies within a node via the DMA engine. */ + /* ! @param[in] operation specifies whether local copies is being enabled or disabled on the specified FIFOs. The BGCNS_Reenable */ + /* ! operation is not supported. */ + /* ! @param[in] group specifies the injection FIFO group. */ + /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */ + /* ! @return Zero if the operation succeeded; non-zero if it did not. */ + /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */ + /* ! argument is 0..3. */ + int (*setDmaLocalCopies)(BGCNS_FifoOperation operation, unsigned group, unsigned bits); + + /* ! @brief Enables or disables the priority bit for the specified injection FIFOs. The priority bit */ + /* ! is used by the hardware arbitration (details are not further documented here). */ + /* ! @param[in] operation specifies whether priority bits are being set or cleared. */ + /* ! @param[in] group specifies the injection FIFO group. */ + /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */ + /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */ + /* ! argument is 0..3. */ + int (*setDmaPriority)(BGCNS_FifoOperation operation, unsigned group, unsigned bits); + + /* ! @brief Sets the mapping from physical (torus hardware) reception FIFOs to reception FIFOs. The hardware supports */ + /* ! 8 torus FIFOs (six torus dimensions plus high priority plus local copy). Furthermore, the hardware supports */ + /* ! 4 groups as derived from the PID0 and PID1 bits of the DMA packet. Thus the mapping is a 4 x 8 matrix of */ + /* ! reception FIFO ids. */ + /* ! @param[in] torus_reception_map maps {group} X {torus-hardware-FIFOs} --> reception FIFOs. */ + /* ! @param[in] fifo_types is an array of N values specifying the type of each normal reception FIFO (see also threshold). For BGP, */ + /* ! N=2 (there are 32 normal reception FIFOs). */ + /* ! @param[in] header_types is an array of N values specifying the type of each reception header FIFO (see also threshold). For */ + /* ! BGP, N=4 (there are 4 reception header FIFOs). Note that reception header FIFOs are typically only used for debugging purposes. */ + /* ! @param[in] threshold is an array of N threshold values. The value threshold[i] specifies the threshold value for reception */ + /* ! FIFO type i. If reception FIFO interrupts are enabled (see setDmaFifoControls) and a reception FIFO's available space drops */ + /* ! below its threshold, an interrupt is driven. For BGP, N=2 (there are type 0 and type 1 injection FIFOs). */ + int (*setDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned header_types[], unsigned threshold[]); + + /* ! @brief Gets the reception map. */ + /* ! @see setDmaReceptionMap for descriptions of the map and arguments. */ + int (*getDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned short* store_headers, unsigned header_types[], unsigned threshold[]); + + + /* ! @deprecated */ + int (*_used_to_be_clearDmaFullReceptionFifo__removed)(void); + + + /* ! @brief Resets the MAC unit's PHY. */ + /* ! @return Zero if the unit was properly reset. Returns non-zero if some error occurred. */ + /* ! @deprecated See macResetPHY_nonBlocking. */ + int (*macResetPHY)(void); + + /* ! @brief Tests the MAC unit's link. */ + /* ! @param[in] link_type specifies the type of link to be tested. */ + /* ! @return One (1) if the link is active; zero (0) if it is not. */ + /* ! @deprecated See macTestLink_nonBlocking */ + int (*macTestLink)(BGCNS_LinkType link_type); + + /* ! @brief Reads one of the MAC's XGMII registers. */ + /* ! @param[in] device_address */ + /* ! @param[in] port_address */ + /* ! @param[in] register_address */ + /* ! @return The register's value or a negative number if some error occurred. */ + /* ! @deprecated Low level MAC register access is being eliminated. */ + int (*macXgmiiRead)(unsigned device_address, unsigned port_address, unsigned register_address); + + /* ! @brief Writes one of the MAC's XGMII registers. */ + /* ! @param[in] device_address */ + /* ! @param[in] port_address */ + /* ! @param[in] register_address */ + /* ! @param[in] value */ + /* ! @return Zero (0) if the register was successfully written; non-zero if some error occurred. */ + /* ! @deprecated Low level MAC register access is being eliminated. */ + int (*macXgmiiWrite)(unsigned device_address, unsigned port_address, unsigned register_address, unsigned value); + + + /* ! @brief Trains SerDes in a non-blocking manner. The standard usage is to inititate */ + /* ! training with trainSerDes(1), check the return code, and then continue to invoke */ + /* ! trainSerDes(0) as long as the return code is BGCNS_RC_CONTINUE. */ + /* ! @param[in] reset Should be 1 when initiating a retraining sequence and 0 for any */ + /* ! continuations. */ + /* ! @return BGCNS_RC_CONTINUE if training is still ongoing (the caller should re-invoke */ + /* ! the service again (with reset=0). BGCNS_RC_COMPLETE if training is complete. */ + /* ! BGCNS_ERROR if some error has occurred. */ + int (*trainSerDes)(int reset); + + /* ! @brief Fetches the value of the specified control parameter of the environmental monitor. */ + /* ! @param[in] parameter Parameter to retrieve. Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */ + /* ! @param[in] value Pointer to the storage location that will contain the parameter's value when the function successfully returns. */ + /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */ + int (*getEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int* value); + + /* ! @brief Stores a value to the specified control parameter of the environmental monitor */ + /* ! @param[in] parameter Parameter to store. Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */ + /* ! @param[in] value New value for the parameter */ + /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */ + int (*setEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int value); + + /* ! @brief Performs checks and ensures that the node will continue to operate within tolerances. */ + /* ! @note MUST be called regularly as indicated by nextCallbackTime parameter */ + /* ! @param[in] nextCallbackTime Upon returning, this will contain the PPC Timebase register value indicating when the next */ + /* ! time the operating system needs to call performEnvMgmt. Failure to do so may result in poorly performing */ + /* ! nodes or shutdown of the block / rack. */ + int (*performEnvMgmt)(unsigned long long* nextCallbackTime); + + + /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */ + /* ! response back from the control system. When this service is used, */ + /* ! the caller must poll for completion using the testForOutboxCompletion */ + /* ! service. */ + /* ! @param[in] facility The facility (aka component). See bgp_ras.h for a list of facilities. */ + /* ! @param[in] unit The unit (aka subcomponent). See bgp_ras.h for a list of units. */ + /* ! @param[in] err_code The error code. See bgp_ras.h for a list of error code.s */ + /* ! @param[in] numDetails The number of additional details. */ + /* ! @param[in] details The list of additional details. */ + /* ! @return Zero if the message was written, non-zero if some error condition occurred. */ + int (*writeRASEvent_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] ); + + /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */ + /* ! response back from the control system. When this service is used, */ + /* ! the caller must poll for completion using the testForOutboxCompletion */ + /* ! service. */ + /* ! @param[in] facility The facility (aka component). See bgp_ras.h for a list of facilities. */ + /* ! @param[in] unit The unit (aka subcomponent). See bgp_ras.h for a list of units. */ + /* ! @param[in] err_code The error code. See bgp_ras.h for a list of error code.s */ + /* ! @param[in] str The message string being written (ASCII encoded, null-terminated). Note that the length of this string is */ + /* ! limited to _BGP_RAS_ASCII_MAX_LEN characters. The implementation may choose to truncate the string if it exceeds this */ + /* ! length. */ + /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */ + /* ! string was truncated). */ + /* ! @return Zero if the message was written, non-zero if some error condition occurred. */ + int (*writeRASString_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, char* str ); + + /* ! @brief Sets the core's timebase registers to the specified value. */ + /* ! @param[in] newtime The new 64-bit timebase */ + /* ! @return Zero if the timebase was successfully set, non-zero if some error condition occurred. */ + /* ! @deprecated */ + int (*synchronizeTimebase)(unsigned long long newtime); + + /* ! @brief Sets the node's DMA physical protection settings. */ + /* ! @note on BGP, there are a maximum of 8 read ranges and 8 write ranges */ + /* ! @return Zero if the DMA ranges were set, non-zero if some error condition occurred. */ + int (*dmaSetRange)(unsigned numreadranges, unsigned long long* read_lower_paddr, unsigned long long* read_upper_paddr, + unsigned numwriteranges, unsigned long long* write_lower_paddr, unsigned long long* write_upper_paddr); + + /* ! @brief Checks the status of the devices and reports correctible RAS (if any) */ + /* ! @param[in] clear_error_counts If non-zero, function will also reset the hardware error counters after posting any RAS. */ + /* ! @return Zero if successful, non-zero if some error condition occurred. */ + int (*statusCheck)(unsigned clear_error_counts); + + /* ! @brief Stops the DMA and clears any reception unit failure */ + int (*stopDma)(void); + + /* ! @brief Starts the DMA */ + int (*startDma)(void); + + /* ! @brief Performs a hard exit. The status code is provided to the control system. */ + /* ! @return This service never returns. */ + void (*exit)(int rc); + + /* ! @brief Resets the MAC unit's PHY but does not block. */ + /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */ + /* ! reset sequence. That is, callers should initiate a reset sequence with reset=1 and then */ + /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this servicate again with */ + /* ! reset=0. */ + /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */ + /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */ + /* ! to indicate that it needs additional time. */ + /* ! @return BGCNS_RC_COMPLETE if the unit was properly reset. BGCNS_RC_CONTINUE if the reset operation is */ + /* ! not yet complete. BGCNS_RC_ERROR if the reset operation failed. */ + int (*macResetPHY_nonBlocking)(int reset, unsigned timeoutInMillis); + + /* ! @brief Tests the MAC unit's link but does not block. */ + /* ! @param[in] link_type specifies the type of link to be tested. */ + /* ! @param[out] result points to the link status, which is valid only when the return code is */ + /* ! BGCNS_RC_COMPLETE. A value of one (1) indicates that the link is active; zero (0) */ + /* ! indicates that it is inactive. */ + /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */ + /* ! test link sequence. That is, callers should initiate a sequence with reset=1 and then */ + /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this service again with */ + /* ! reset=0. */ + /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */ + /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */ + /* ! to indicate that it needs additional time. */ + /* ! @return BGCNS_RC_COMPLETE if the test is complete (result is valid only in this case). BGCNS_RC_CONTINUE */ + /* ! if the reset operation is not yet complete. BGCNS_RC_ERROR if the reset operation failed. */ + int (*macTestLink_nonBlocking)(BGCNS_LinkType link_type, unsigned* result, int reset, unsigned timeoutInMillis); + + void * _not_in_use_1068; + void * _not_in_use_1069; + + + /* ! @brief Indicates that a new job is about to start. */ + /* ! @return Zero (0) if CNS is ready for a new job to start. Returns non-zero otherwise. */ + int (*startNextJob)(void); + + /* ! @brief Indicates that the CNS should use the specified virtual address when accessing the */ + /* ! given device. When a device is remapped, CNS will no longer make any attempt to map */ + /* ! a TLB to access that device -- it is the responsibility of the kernel to handle the */ + /* ! TLB either proactively or reactively (via a fault). */ + /* ! @param[in] device specifies the device being mapped. */ + /* ! @param[in] base_address is the root virtual address of the device. The address should be */ + /* ! naturally aligned (relative to the size of the device). See the seciton Reserved and */ + /* ! Preferred Addresses for more information. */ + /* ! @return Zero (0) if the device was successfully remapped. Returns non-zero if it was not. */ + /* ! @remarks The lock box is in active use by CNS during early boot and thus it is not */ + /* ! possible to remap the BGCNS_LockBox device until all cores are activated by the kernel */ + /* ! (that is, takeCPU has been called for all cores). */ + int (*mapDevice)(BGCNS_DeviceMasks device, void* base_address); + + /* ! @brief Enables barriers on the specified channel. */ + /* ! @param channel specifies the channel being enabled. */ + /* ! @param user_mode indicates whether the barrier is to be used in user-mode code. */ + /* ! @return Zero if global barriers were enabled. Returns non-zero if the request could not be */ + /* ! completed, including the case of attempting to enable a reserved channel. */ + int (*enableBarrier)(unsigned int channel, int user_mode); + + /* ! @brief Disables barriers on the specified channel. */ + /* ! @return Zero if global barriers were disabled. Returnsnon-zero if the request could not be */ + /* ! completed, including the case of attempting to disable a reserved channel. */ + int (*disableBarrier)(unsigned int channel); + + /* ! @brief A global barrier that does not block indefinitely. */ + /* ! @param channel indicates the GLINT hardware channel to use. */ + /* ! @param reset indicates whether this is the beginning (1) or a continuation (0) of a barrier */ + /* ! sequence. That is, caller should inititate a barrier operation by passing reset=1 and then, */ + /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke the service again with */ + /* ! reset=0. */ + /* ! @param timeoutInMillis is the (approximate) number of milliseconds that this service is allowed */ + /* ! to wait for barrier participants before returning to the caller. */ + /* ! @return BGCNS_RC_COMPLETE indicates that all participants have arrived at the barrier. BGCNS_RC_CONTINUE */ + /* ! indicates that not all partipants arrived within the alloted timeout period. BGCNS_RC_ERROR */ + /* ! indicates that other problem has been detected. */ + /* ! @remarks This service is not thread safe. It is considered a programming error to invoke it */ + /* ! from multiple threads concurrently and the behavior is not defined. */ + int (*globalBarrier_nonBlocking)(unsigned channel, int reset, unsigned timeoutInMillis); + + /* ! @brief Restart kernel in cycle reproducibility mode. */ + /* ! @return Zero if no restart was required for reproducibility. */ + /* ! @remarks This service must be called from each core and only after all I/O operations have been completed. */ + /* ! Processors will be reset and kernels will start again. */ + int (*setupReproducibility)(void); + +} BGCNS_ServiceDirectory; + +/* ! @deprecated */ +/* ! @typedef BGCNS_DeprecatedServicesDirectory */ +/* ! @struct _BGCNS_DeprecatedServices */ +/* ! @brief These services exist for historical reasons and are not further documented here. */ +/* ! They may not be available in future releases of CNS. */ +typedef struct _BGCNS_DeprecatedServices { + int (*torusTermCheck)(int* nonFatalRc); + int (*torusLinkErrCheck)(int* nonFatalRc); + int (*torusCRCExchange)(void); + int (*collectiveConfigureClassInternal)(unsigned virtualTree, unsigned short specifier); + int (*collectiveConfigureClass)(unsigned virtualTree, unsigned short specifier); + unsigned (*collectiveGetClass)(unsigned virtualTree); + int (*collectiveInit)(void); + int (*collectiveRelease)(void); + int (*collectiveHardReset)(void); + int (*netbusTermCheck)(void); + unsigned (*getSerDesLinkStatus)(void); + int (*dmaTermCheck)(void); +} BGCNS_DeprecatedServicesDirectory; + +/* ! @typedef BGCNS_Descriptor */ +/* ! @struct _BGCNS_Descriptor */ +/* ! @brief The Common Node Services descriptor. This descriptor provides information to the kernel regarding */ +/* ! the CNS memory region as well as a service directory. The descriptor is passed to the kernel */ +/* ! upon boot and must not be altered by the kernel. */ +typedef struct _BGCNS_Descriptor { + BGCNS_ServiceDirectory* services; //!< A pointer to the services directory. + unsigned baseVirtualAddress; //!< The virtual address of the beginning of the CNS memory region. + unsigned size; //!< The size (in bytes) of the CNS memory region. + unsigned basePhysicalAddress; //!< The physical address of the CNS memory region. + unsigned basePhysicalAddressERPN; //!< The extended real page number of the CNS memory region. + unsigned bgcns_private_in_use; //!< Undefined. This field is for internal use only and may disappear at any time. + BGCNS_DeprecatedServicesDirectory* deprecatedServices; //!< @deprecated undocumented + unsigned version; //!< The CNS version +} BGCNS_Descriptor; + + + +#endif /* !__ASSEMBLY */ +#endif /* _BGCNS_H */ diff --git a/arch/powerpc/include/asm/bgp_personality.h b/arch/powerpc/include/asm/bgp_personality.h new file mode 100644 index 00000000000000..f4d9309640a4bf --- /dev/null +++ b/arch/powerpc/include/asm/bgp_personality.h @@ -0,0 +1,1086 @@ +/* + * Andrew Tauferner + * + * Copyright 2006, 2007 International Business Machines + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef BGP_PERSONALITY_H_ // Prevent multiple inclusion +#define BGP_PERSONALITY_H_ + + + + +/* #include <linux/types.h> */ + +// These defines allows use of IBM's bit numberings (MSb=0, LSb=31)for multi-bit fields +// b = IBM bit number of the least significant bit (highest number) +// x = value to set in field +// s = size +#define _BS(b,x,s)( ( ( x) & ( 0x7FFFFFFF>> ( 31- ( s)))) << ( 31- ( b))) +#define _BG(b,x,s)( ( _BS(b,0x7FFFFFFF,s) & x ) >> (31-b) ) +#define _BS64(b,x,s)( ( ( x) & ( 0x7FFFFFFFFFFFFFFFLL>> ( 63- ( s)))) << ( 63- ( b))) +#define _BG64(b,x,s)( ( _BS64(b, 0x7FFFFFFFFFFFFFFFLL,s) & x ) >> (63-b) ) +#define _BN(b) ((1<<(31-(b)))) +#define _B1(b,x) (((x)&0x1)<<(31-(b))) +#define _B2(b,x) (((x)&0x3)<<(31-(b))) +#define _B3(b,x) (((x)&0x7)<<(31-(b))) +#define _B4(b,x) (((x)&0xF)<<(31-(b))) +#define _B5(b,x) (((x)&0x1F)<<(31-(b))) +#define _B6(b,x) (((x)&0x3F)<<(31-(b))) +#define _B7(b,x) (((x)&0x7F)<<(31-(b))) +#define _B8(b,x) (((x)&0xFF)<<(31-(b))) +#define _B9(b,x) (((x)&0x1FF)<<(31-(b))) +#define _B10(b,x) (((x)&0x3FF)<<(31-(b))) +#define _B11(b,x) (((x)&0x7FF)<<(31-(b))) +#define _B12(b,x) (((x)&0xFFF)<<(31-(b))) +#define _B13(b,x) (((x)&0x1FFF)<<(31-(b))) +#define _B14(b,x) (((x)&0x3FFF)<<(31-(b))) +#define _B15(b,x) (((x)&0x7FFF)<<(31-(b))) +#define _B16(b,x) (((x)&0xFFFF)<<(31-(b))) +#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b))) +#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b))) +#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b))) +#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b))) +#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b))) +#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b))) +#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b))) +#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b))) +#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b))) +#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b))) +#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b))) +#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b))) +#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b))) +#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b))) +#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b))) + +#define BGP_UCI_Component_Rack ( 0) +#define BGP_UCI_Component_Midplane ( 1) +#define BGP_UCI_Component_BulkPowerSupply ( 2) +#define BGP_UCI_Component_PowerCable ( 3) +#define BGP_UCI_Component_PowerModule ( 4) +#define BGP_UCI_Component_ClockCard ( 5) +#define BGP_UCI_Component_FanAssembly ( 6) +#define BGP_UCI_Component_Fan ( 7) +#define BGP_UCI_Component_ServiceCard ( 8) +#define BGP_UCI_Component_LinkCard ( 9) +#define BGP_UCI_Component_LinkChip (10) +#define BGP_UCI_Component_LinkPort (11) // Identifies 1 end of a LinkCable +#define BGP_UCI_Component_NodeCard (12) +#define BGP_UCI_Component_ComputeCard (13) +#define BGP_UCI_Component_IOCard (14) +#define BGP_UCI_Component_DDRChip (15) +#define BGP_UCI_Component_ENetConnector (16) + +typedef struct BGP_UCI_Rack_t + { // "Rxy": R<RackRow><RackColumn> + unsigned Component : 5; // when BGP_UCI_Component_Rack + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned _zero : 19; // zero's + } + BGP_UCI_Rack_t; + +#define BGP_UCI_RACK_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Rack +#define BGP_UCI_RACK_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_RACK_RACKCOLUMN(x) _B4(12,x) // 0..F + + + +typedef struct BGP_UCI_Midplane_t + { // "Rxy-Mm": R<RackRow><RackColumn>-M<Midplane> + unsigned Component : 5; // when BGP_UCI_Component_Midplane + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned _zero : 18; // zero's + } + BGP_UCI_Midplane_t; + +#define BGP_UCI_MIDPLANE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Midplane +#define BGP_UCI_MIDPLANE_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_MIDPLANE_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_MIDPLANE_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top + + +typedef struct BGP_UCI_BulkPowerSupply_t + { // "Rxy-B": R<RackRow><RackColumn>-B + unsigned Component : 5; // when BGP_UCI_Component_BulkPowerSupply + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned _zero : 19; // zero's + } + BGP_UCI_BulkPowerSupply_t; + +#define BGP_UCI_BULKPOWERSUPPLY_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_BulkPowerSupply +#define BGP_UCI_BULKPOWERSUPPLY_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_BULKPOWERSUPPLY_RACKCOLUMN(x) _B4(12,x) // 0..F + + + +typedef struct BGP_UCI_PowerCable_t + { // "Rxy-B-C": R<RackRow><RackColumn>-B-C + unsigned Component : 5; // when BGP_UCI_Component_PowerCable + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned _zero : 19; // zero's + } + BGP_UCI_PowerCable_t; + +#define BGP_UCI_POWERCABLE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerCable +#define BGP_UCI_POWERCABLE_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_POWERCABLE_RACKCOLUMN(x) _B4(12,x) // 0..F + + + +typedef struct BGP_UCI_PowerModule_t + { // "Rxy-B-Pp": R<RackRow><RackColumn>-B-P<PowerModule> + unsigned Component : 5; // when BGP_UCI_Component_PowerModule + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned PowerModule : 3; // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear) + unsigned _zero : 16; // zero's + } + BGP_UCI_PowerModule_t; + +#define BGP_UCI_POWERMODULE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerModule +#define BGP_UCI_POWERMODULE_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_POWERMODULE_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_POWERMODULE_POWERMODULE(x) _B3(15,x) // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear) + + +typedef struct BGP_UCI_ClockCard_t + { // "Rxy-K": R<RackRow><RackColumn>-K + unsigned Component : 5; // when BGP_UCI_Component_ClockCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned _zero : 19; // zero's + } + BGP_UCI_ClockCard_t; + +#define BGP_UCI_CLOCKCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerModule +#define BGP_UCI_CLOCKCARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_CLOCKCARD_RACKCOLUMN(x) _B4(12,x) // 0..F + + + +typedef struct BGP_UCI_FanAssembly_t + { // "Rxy-Mm-Aa": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly> + unsigned Component : 5; // when BGP_UCI_Component_FanAssembly + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned FanAssembly : 4; // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear) + unsigned _zero : 14; // zero's + } + BGP_UCI_FanAssembly_t; + +#define BGP_UCI_FANASSEMBLY_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_FanAssembly +#define BGP_UCI_FANASSEMBLY_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_FANASSEMBLY_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_FANASSEMBLY_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_FANASSEMBLY_FANASSEMBLY(x) _B4(17,x) // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear) + + + +typedef struct BGP_UCI_Fan_t + { // "Rxy-Mm-Aa-Ff": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>-F<Fan> + unsigned Component : 5; // when BGP_UCI_Component_Fan + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned FanAssembly : 4; // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear) + unsigned Fan : 2; // 0..2 (0=Tailstock, 2=Midplane) + unsigned _zero : 12; // zero's + } + BGP_UCI_Fan_t; + +#define BGP_UCI_FAN_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Fan +#define BGP_UCI_FAN_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_FAN_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_FAN_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_FAN_FANASSEMBLY(x) _B4(17,x) // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear) +#define BGP_UCI_FAN_FAN(x) _B2(19,x) // 0..2 (0=Tailstock, 2=Midplane) + +typedef struct BGP_UCI_ServiceCard_t + { // "Rxy-Mm-S": R<RackRow><RackColumn>-M<Midplane>-S + unsigned Component : 5; // when BGP_UCI_Component_ServiceCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top (Master ServiceCard in M0) + unsigned _zero : 18; // zero's + } + BGP_UCI_ServiceCard_t; + +#define BGP_UCI_SERVICECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ServiceCard +#define BGP_UCI_SERVICECARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_SERVICECARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_SERVICECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top (Master ServiceCard in M0) + + + +typedef struct BGP_UCI_LinkCard_t + { // "Rxy-Mm-Ll": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard> + unsigned Component : 5; // when BGP_UCI_Component_LinkCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) + unsigned _zero : 16; // zero's + } + BGP_UCI_LinkCard_t; + +#define BGP_UCI_LINKCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkCard +#define BGP_UCI_LINKCARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_LINKCARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_LINKCARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_LINKCARD_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) + + + +typedef struct BGP_UCI_LinkChip_t + { // "Rxy-Mm-Ll-Uu": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-U<LinkChip> + unsigned Component : 5; // when BGP_UCI_Component_LinkChip + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) + unsigned LinkChip : 3; // 00..05: left to right from Front + unsigned _zero : 13; // zero's + } + BGP_UCI_LinkChip_t; + +#define BGP_UCI_LINKCHIP_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkChip +#define BGP_UCI_LINKCHIP_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_LINKCHIP_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_LINKCHIP_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_LINKCHIP_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) +#define BGP_UCI_LINKCHIP_LINKCHIP(x) _B3(18,x) // 00..05: left to right from Front + +typedef struct BGP_UCI_LinkPort_t + { // "Rxy-Mm-Ll-Jjj": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-J<LinkPort> + unsigned Component : 5; // when BGP_UCI_Component_LinkPort + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) + unsigned LinkPort : 4; // 00..15: left to right from Front + unsigned _zero : 12; // zero's + } + BGP_UCI_LinkPort_t; + +#define BGP_UCI_LINKPORT_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkPort +#define BGP_UCI_LINKPORT_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_LINKPORT_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_LINKPORT_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_LINKPORT_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR) +#define BGP_UCI_LINKPORT_LINKPORT(x) _B4(19,x) // 00..15: left to right from Front + + +typedef struct BGP_UCI_NodeCard_t + { // "Rxy-Mm-Nnn": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard> + unsigned Component : 5; // when BGP_UCI_Component_NodeCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned _zero : 14; // zero's + } + BGP_UCI_NodeCard_t; + +#define BGP_UCI_NODECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_NodeCard +#define BGP_UCI_NODECARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_NODECARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_NODECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_NODECARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + + + +typedef struct BGP_UCI_ComputeCard_t + { // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard> + unsigned Component : 5; // when BGP_UCI_Component_ComputeCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned ComputeCard : 6; // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + unsigned _zero : 8; // zero's + } + BGP_UCI_ComputeCard_t; + +#define BGP_UCI_COMPUTECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ComputeCard +#define BGP_UCI_COMPUTECARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_COMPUTECARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_COMPUTECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_COMPUTECARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) +#define BGP_UCI_COMPUTECARD_COMPUTECARD(x) _B6(23,x) // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + + +typedef struct BGP_UCI_IOCard_t + { // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard> + unsigned Component : 5; // when BGP_UCI_Component_IOCard + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned ComputeCard : 6; // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + unsigned _zero : 8; // zero's + } + BGP_UCI_IOCard_t; + +#define BGP_UCI_IOCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_IOCard +#define BGP_UCI_IOCARD_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_IOCARD_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_IOCARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_IOCARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) +#define BGP_UCI_IOCARD_COMPUTECARD(x) _B6(23,x) // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + + + +typedef struct BGP_UCI_DDRChip_t + { // "Rxy-Mm-Nnn-Jxx-Uuu": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>-U<DDRChip> + unsigned Component : 5; // when BGP_UCI_Component_DDRChip + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned ComputeCard : 6; // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) + unsigned DDRChip : 5; // 00..20 + unsigned _zero : 3; // zero's + } + BGP_UCI_DDRChip_t; + +#define BGP_UCI_DDRCHIP_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_DDRChip +#define BGP_UCI_DDRCHIP_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_DDRCHIP_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_DDRCHIP_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_DDRCHIP_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) +#define BGP_UCI_DDRCHIP_COMPUTECARD(x) _B6(23,x) // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard) +#define BGP_UCI_DDRCHIP_DDRCHIP(x) _B5(28,x) // 00..20 + + +typedef struct BGP_UCI_ENetConnector_t + { // "Rxy-Mm-Nnn-ENe": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-EN<EN> + unsigned Component : 5; // when BGP_UCI_Component_ENetConnector + unsigned RackRow : 4; // 0..F + unsigned RackColumn : 4; // 0..F + unsigned Midplane : 1; // 0=Bottom, 1=Top + unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) + unsigned EN : 1; // 0..1 (Equal to IOCard number) + unsigned _zero : 13; // zero's + } + BGP_UCI_ENetConnector_t; + +#define BGP_UCI_ENETCONNECTOR_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ENetConnector +#define BGP_UCI_ENETCONNECTOR_RACKROW(x) _B4( 8,x) // 0..F +#define BGP_UCI_ENETCONNECTOR_RACKCOLUMN(x) _B4(12,x) // 0..F +#define BGP_UCI_ENETCONNECTOR_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top +#define BGP_UCI_ENETCONNECTOR_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR) +#define BGP_UCI_ENETCONNECTOR_ENETCONNECTOR(x) _B1(18,x) // 0..1 (Equal to IOCard number) + + + +typedef union TBGP_UniversalComponentIdentifier + { + uint32_t UCI; + BGP_UCI_Rack_t Rack; + BGP_UCI_Midplane_t Midplane; + BGP_UCI_BulkPowerSupply_t BulkPowerSupply; + BGP_UCI_PowerCable_t PowerCable; + BGP_UCI_PowerModule_t PowerModule; + BGP_UCI_ClockCard_t ClockCard; + BGP_UCI_FanAssembly_t FanAssembly; + BGP_UCI_Fan_t Fan; + BGP_UCI_ServiceCard_t ServiceCard; + BGP_UCI_LinkCard_t LinkCard; + BGP_UCI_LinkChip_t LinkChip; + BGP_UCI_LinkPort_t LinkPort; + BGP_UCI_NodeCard_t NodeCard; + BGP_UCI_ComputeCard_t ComputeCard; + BGP_UCI_IOCard_t IOCard; + BGP_UCI_DDRChip_t DDRChip; + BGP_UCI_ENetConnector_t ENetConnector; + } + BGP_UniversalComponentIdentifier; + + + +#define BGP_PERSONALITY_VERSION (0x0A) + +#define BGP_DEFAULT_FREQ (850) + +#define BGP_PERS_PROCESSCONFIG_DIAGS (0xFF000000) // Diagnostic Mode: All Cores Enabled and Privileged in Process 0 +#define BGP_PERS_PROCESSCONFIG_SMP (0x0F000000) // All Cores Enabled User-Space in Process 0 +#define BGP_PERS_PROCESSCONFIG_VNM (0x08040201) // 4 Single-Core Processes (a.k.a. Virtual Nodes) +#define BGP_PERS_PROCESSCONFIG_2x2 (0x0C030000) // 2 Processes of 2 Cores each in same DP unit +#define BGP_PERS_PROCESSCONFIG_2x2_CROSS1 (0x09060000) // 2 Processes of 2 Cores in different DP units +#define BGP_PERS_PROCESSCONFIG_2x2_CROSS2 (0x0A050000) // 2 Processes of 2 Cores in different DP units +#define BGP_PERS_PROCESSCONFIG_3PLUS1 (0x0E010000) // 3 Cores in one Processes, 4th Core in Separate Process +#define BGP_PERS_PROCESSCONFIG_DEFAULT (BGP_PERS_PROCESSCONFIG_DIAGS) + + +// Personality.Kernel_Config.RASPolicy +#define BGP_PERS_RASPOLICY_VERBOSITY(x) _B2( 1,x) // Verbosity as shown below +#define BGP_PERS_RASPOLICY_MINIMAL BGP_PERS_RASPOLICY_VERBOSITY(0) // Benchmarking Level of Capture and Reporting +#define BGP_PERS_RASPOLICY_NORMAL BGP_PERS_RASPOLICY_VERBOSITY(1) // Normal Production Level of Capture and Reporting +#define BGP_PERS_RASPOLICY_VERBOSE BGP_PERS_RASPOLICY_VERBOSITY(2) // Manufacturing Test and Diagnostics +#define BGP_PERS_RASPOLICY_EXTREME BGP_PERS_RASPOLICY_VERBOSITY(3) // Report Every Event Immediately - Thresholds set to 1 +#define BGP_PERS_RASPOLICY_FATALEXIT _BN( 2) // Fatal is Fatal, so exit. + +#define BGP_PERS_RASPOLICY_DEFAULT (BGP_PERS_RASPOLICY_VERBOSE | BGP_PERS_RASPOLICY_FATALEXIT) + + +#define BGP_PERSONALITY_LEN_NFSDIR (32) // 32bytes + +#define BGP_PERSONALITY_LEN_SECKEY (32) // 32bytes + +// Personality.NodeConfig Driver Enables and Configurations +#define BGP_PERS_ENABLE_Simulation _BN( 0) // Running on VHDL Simulation +#define BGP_PERS_ENABLE_LockBox _BN( 1) +#define BGP_PERS_ENABLE_BIC _BN( 2) +#define BGP_PERS_ENABLE_DDR _BN( 3) // DDR Controllers (not Fusion DDR model) +#define BGP_PERS_ENABLE_LoopBack _BN( 4) // LoopBack: Internal TS/TR or SerDes Loopback +#define BGP_PERS_ENABLE_GlobalInts _BN( 5) +#define BGP_PERS_ENABLE_Collective _BN( 6) // Enable Collective Network +#define BGP_PERS_ENABLE_Torus _BN( 7) +#define BGP_PERS_ENABLE_TorusMeshX _BN( 8) // Torus is a Mesh in the X-dimension +#define BGP_PERS_ENABLE_TorusMeshY _BN( 9) // Torus is a Mesh in the Y-dimension +#define BGP_PERS_ENABLE_TorusMeshZ _BN(10) // Torus is a Mesh in the Z-dimension +#define BGP_PERS_ENABLE_TreeA _BN(11) // Enable Collective Network A-link +#define BGP_PERS_ENABLE_TreeB _BN(12) // Enable Collective Network B-link +#define BGP_PERS_ENABLE_TreeC _BN(13) // Enable Collective Network C-link +#define BGP_PERS_ENABLE_DMA _BN(14) +#define BGP_PERS_ENABLE_SerDes _BN(15) +#define BGP_PERS_ENABLE_UPC _BN(16) +#define BGP_PERS_ENABLE_EnvMon _BN(17) +#define BGP_PERS_ENABLE_Ethernet _BN(18) +#define BGP_PERS_ENABLE_JTagLoader _BN(19) // Converse with JTag Host to load kernel +#define BGP_PERS_ENABLE_MailBoxReceive BGP_PERS_ENABLE_JTagLoader +#define BGP_PERS_ENABLE_PowerSave _BN(20) // Turn off unused devices (Eth on CN, TS on ION) +#define BGP_PERS_ENABLE_FPU _BN(21) // Enable Double-Hummers (not supported in EventSim) +#define BGP_PERS_ENABLE_StandAlone _BN(22) // Disable "CIOD" interface, Requires Collective! +#define BGP_PERS_ENABLE_TLBMisses _BN(23) // TLB Misses vs Wasting Memory (see bgp_AppSetup.c) +#define BGP_PERS_ENABLE_Mambo _BN(24) // Running under Mambo? Used by Linux +#define BGP_PERS_ENABLE_TreeBlast _BN(25) // Enable Tree "Blast" mode +#define BGP_PERS_ENABLE_BlindStacks _BN(26) // For "XB" Tests, Lock 16K Stacks in Blind Device +#define BGP_PERS_ENABLE_CNK_Malloc _BN(27) // Enable Malloc Support in CNK. +#define BGP_PERS_ENABLE_Reproducibility _BN(28) // Enable Cycle Reproducibility +#define BGP_PERS_ENABLE_HighThroughput _BN(29) // Enable high throughput computing mode +#define BGP_PERS_ENABLE_DiagnosticsMode _BN(30) // Enable diagnostics mode + +// Configure L1+L2 into BG/L Mode (s/w managed L1 coherence, write-back) +// This overrides most L1, L2, and Snoop settings. Carefull!! +#define BGP_PERS_ENABLE_BGLMODE _BN(31) // (not yet fully implemented) + +// Default Setup for Simulation: Torus Meshes, DMA, SerDes, Ethernet, JTagLoader, PowerSave +#define BGP_PERS_NODECONFIG_DEFAULT (BGP_PERS_ENABLE_Simulation |\ + BGP_PERS_ENABLE_LockBox |\ + BGP_PERS_ENABLE_BIC |\ + BGP_PERS_ENABLE_DDR |\ + BGP_PERS_ENABLE_LoopBack |\ + BGP_PERS_ENABLE_GlobalInts |\ + BGP_PERS_ENABLE_Collective |\ + BGP_PERS_ENABLE_Torus |\ + BGP_PERS_ENABLE_UPC |\ + BGP_PERS_ENABLE_EnvMon |\ + BGP_PERS_ENABLE_FPU |\ + BGP_PERS_ENABLE_StandAlone) + +// Default Setup for Hardware: +// Supports Stand-Alone CNA Applications. +// Bootloader-Extensions and XB's must turn-off JTagLoader +#define BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE (BGP_PERS_ENABLE_JTagLoader |\ + BGP_PERS_ENABLE_LockBox |\ + BGP_PERS_ENABLE_BIC |\ + BGP_PERS_ENABLE_DDR |\ + BGP_PERS_ENABLE_GlobalInts |\ + BGP_PERS_ENABLE_Collective |\ + BGP_PERS_ENABLE_SerDes |\ + BGP_PERS_ENABLE_UPC |\ + BGP_PERS_ENABLE_EnvMon |\ + BGP_PERS_ENABLE_FPU |\ + BGP_PERS_ENABLE_StandAlone) + +// these fields are defined by the control system depending on compute/io node +// BGP_PERS_ENABLE_Torus | +// BGP_PERS_ENABLE_TorusMeshX | +// BGP_PERS_ENABLE_TorusMeshY | +// BGP_PERS_ENABLE_TorusMeshZ | + + + +// Personality.L1Config: Controls and Settings for L1 Cache +#define BGP_PERS_L1CONFIG_L1I _BN( 0) // L1 Enabled for Instructions +#define BGP_PERS_L1CONFIG_L1D _BN( 1) // L1 Enabled for Data +#define BGP_PERS_L1CONFIG_L1SWOA _BN( 2) // L1 Store WithOut Allocate +#define BGP_PERS_L1CONFIG_L1Recovery _BN( 3) // L1 Full Recovery Mode +#define BGP_PERS_L1CONFIG_L1WriteThru _BN( 4) // L1 Write-Thru (not svc_host changeable (yet?)) +#define BGP_PERS_L1CONFIG_DO_L1ITrans _BN( 5) // Enable L1 Instructions Transient? +#define BGP_PERS_L1CONFIG_DO_L1DTrans _BN( 6) // Enable L1 Data Transient? + // unused 9bits: 7..15 +#define BGP_PERS_L1CONFIG_L1ITrans(x) _B8(23,x) // L1 Transient for Instructions in Groups of 16 Lines +#define BGP_PERS_L1CONFIG_L1DTrans(x) _B8(31,x) // L1 Transient for Data in Groups of 16 Lines + +#define BGP_PERS_L1CONFIG_DEFAULT (BGP_PERS_L1CONFIG_L1I |\ + BGP_PERS_L1CONFIG_L1D |\ + BGP_PERS_L1CONFIG_L1SWOA |\ + BGP_PERS_L1CONFIG_L1Recovery |\ + BGP_PERS_L1CONFIG_L1WriteThru) + +typedef union TBGP_Pers_L1Cfg + { + uint32_t l1cfg; + struct { + unsigned l1i : 1; + unsigned l1d : 1; + unsigned l1swoa : 1; + unsigned l1recovery : 1; + unsigned l1writethru : 1; + unsigned do_l1itrans : 1; + unsigned do_l1dtrans : 1; + unsigned l1rsvd : 9; + unsigned l1itrans : 8; + unsigned l1dtrans : 8; + }; + } + BGP_Pers_L1Cfg; + +// Personality.L2Config: Controls and Settings for L2 and Snoop +#define BGP_PERS_L2CONFIG_L2I _BN( 0) // L2 Instruction Caching Enabled +#define BGP_PERS_L2CONFIG_L2D _BN( 1) // L2 Data Caching Enabled +#define BGP_PERS_L2CONFIG_L2PF _BN( 2) // L2 Automatic Prefetching Enabled +#define BGP_PERS_L2CONFIG_L2PFO _BN( 3) // L2 Optimistic Prefetching Enabled +#define BGP_PERS_L2CONFIG_L2PFA _BN( 4) // L2 Aggressive Prefetching Enabled (fewer deeper streams) +#define BGP_PERS_L2CONFIG_L2PFS _BN( 5) // L2 Aggressive Many-Stream Prefetching Enabled (deeper only when available buffers) +#define BGP_PERS_L2CONFIG_Snoop _BN( 6) // Just NULL Snoop Filter +#define BGP_PERS_L2CONFIG_SnoopCache _BN( 7) // Snoop Caches +#define BGP_PERS_L2CONFIG_SnoopStream _BN( 8) // Snoop Stream Registers (Disable for BG/P Rit 1.0 due to PPC450 errata) +#define BGP_PERS_L2CONFIG_SnoopRange _BN( 9) // Snoop Range Filter when possible +#define BGP_PERS_L2CONFIG_BUG824LUMPY _BN(10) // BPC_BUGS 824: Fix with Lumpy Performance +#define BGP_PERS_L2CONFIG_BUG824SMOOTH _BN(11) // BPC_BUGS 824: Fix with Smooth Performance, but -12% Memory +#define BGP_PERS_L2CONFIG_NONCOHERENT_STACKS _BN(12) // Special for Snoop diagnostics. See bgp_vmm.c + // additional bits may be used for Snoop setting tweaks + +// Default L2 Configuration: +// L2 Enabled with Multi-Stream Aggressive Prefetching +// Snoop Enabled with all filters except Range +#define BGP_PERS_L2CONFIG_DEFAULT (BGP_PERS_L2CONFIG_L2I |\ + BGP_PERS_L2CONFIG_L2D |\ + BGP_PERS_L2CONFIG_L2PF |\ + BGP_PERS_L2CONFIG_L2PFO |\ + BGP_PERS_L2CONFIG_L2PFS |\ + BGP_PERS_L2CONFIG_Snoop |\ + BGP_PERS_L2CONFIG_SnoopCache |\ + BGP_PERS_L2CONFIG_SnoopStream|\ + BGP_PERS_L2CONFIG_BUG824LUMPY) + + +// Personality.L3Config: Controls and Settings for L3 +// Note: Most bits match BGP_L3x_CTRL DCRs. +// See arch/include/bpcore/bgl_l3_dcr.h +#define BGP_PERS_L3CONFIG_L3I _BN( 0) // L3 Enabled for Instructions +#define BGP_PERS_L3CONFIG_L3D _BN( 1) // L3 Enabled for Data +#define BGP_PERS_L3CONFIG_L3PFI _BN( 2) // Inhibit L3 Prefetch from DDR +#define BGP_PERS_L3CONFIG_DO_Scratch _BN( 3) // Set up Scratch? +#define BGP_PERS_L3CONFIG_DO_PFD0 _BN( 4) // Adjust PFD0? +#define BGP_PERS_L3CONFIG_DO_PFD1 _BN( 5) // Adjust PFD1? +#define BGP_PERS_L3CONFIG_DO_PFDMA _BN( 6) // Adjust PFDMA? +#define BGP_PERS_L3CONFIG_DO_PFQD _BN( 7) // Adjust PFQD? + // 8..15 unused/available +#define BGP_PERS_L3CONFIG_Scratch(x) _B4(19,x) // Scratch 8ths: 0..8 +#define BGP_PERS_L3CONFIG_PFD0(x) _B3(22,x) // Prefetch Depth for DP0 +#define BGP_PERS_L3CONFIG_PFD1(x) _B3(25,x) // Prefetch Depth for DP1 +#define BGP_PERS_L3CONFIG_PFDMA(x) _B3(28,x) // Prefetch Depth for DMA +#define BGP_PERS_L3CONFIG_PFQD(x) _B3(31,x) // Prefetch Queue Depth + +// General L3 Configuration +typedef union TBGP_Pers_L3Cfg + { + uint32_t l3cfg; + struct { + unsigned l3i : 1; + unsigned l3d : 1; + unsigned l3pfi : 1; + unsigned do_scratch : 1; + unsigned do_pfd0 : 1; + unsigned do_pfd1 : 1; + unsigned do_pfdma : 1; + unsigned do_pfqd : 1; + unsigned rsvd : 8; + unsigned scratch : 4; + unsigned pfd0 : 3; + unsigned pfd1 : 3; + unsigned pfdma : 3; + unsigned pfqd : 3; + }; + } + BGP_Pers_L3Cfg; + +// Default L3 Configuration: +// L3 Enabled for Instructions and Data +// No Prefetch Depth overrides, No Scratch, No Scrambling. +#define BGP_PERS_L3CONFIG_DEFAULT (BGP_PERS_L3CONFIG_L3I |\ + BGP_PERS_L3CONFIG_L3D |\ + BGP_PERS_L3CONFIG_DO_PFDMA |\ + BGP_PERS_L3CONFIG_PFDMA(4)) + + +// L3 Cache and Bank Selection, and prefetching tweaks (Recommended for Power-Users) +#define BGP_PERS_L3SELECT_DO_CacheSel _BN( 0) // Adjust Cache Select setting? +#define BGP_PERS_L3SELECT_DO_BankSel _BN( 1) // Adjust Bank Select setting? +#define BGP_PERS_L3SELECT_Scramble _BN( 2) // L3 Scramble +#define BGP_PERS_L3SELECT_PFby2 _BN( 3) // Prefetch by 2 if set, else by 1 (default) if clear. +#define BGP_PERS_L3SELECT_CacheSel(x) _B5( 8,x) // PhysAddr Bit for L3 Selection (0..26) +#define BGP_PERS_L3SELECT_BankSel(x) _B5(13,x) // PhysAddr Bit for L3 Bank Selection (0..26) Must be > CacheSel. + +typedef union TBGP_Pers_L3Select + { + uint32_t l3select; + struct { + unsigned do_CacheSel : 1; + unsigned do_BankSel : 1; + unsigned l3Scramble : 1; + unsigned l3_PF_by2 : 1; // default is PreFetch by 1. + unsigned CacheSel : 5; // Physical Address Bit for L3 Selection (0..26) + unsigned BankSel : 5; // 0..26 Must be strictly greater than CacheSel. + unsigned rsvd : 18; + }; + } + BGP_Pers_L3Select; + +// Default L3 Selection Configuration: Disable overrides, but set h/w default values. +#define BGP_PERS_L3SELECT_DEFAULT (BGP_PERS_L3SELECT_CacheSel(21) |\ + BGP_PERS_L3SELECT_BankSel(26)) + +// Tracing Masks and default trace configuration +#define BGP_TRACE_CONFIG _BN( 0) // Display Encoded personality config on startup +#define BGP_TRACE_ENTRY _BN( 1) // Function enter and exit +#define BGP_TRACE_INTS _BN( 2) // Standard Interrupt Dispatch +#define BGP_TRACE_CINTS _BN( 3) // Critical Interrupt Dispatch +#define BGP_TRACE_MCHK _BN( 4) // Machine Check Dispatch +#define BGP_TRACE_SYSCALL _BN( 5) // System Calls +#define BGP_TRACE_VMM _BN( 6) // Virtual Memory Manager +#define BGP_TRACE_DEBUG _BN( 7) // Debug Events (app crashes etc) +#define BGP_TRACE_TORUS _BN( 8) // Torus Init +#define BGP_TRACE_TREE _BN( 9) // Tree Init +#define BGP_TRACE_GLOBINT _BN(10) // Global Interrupts +#define BGP_TRACE_DMA _BN(11) // DMA Setup +#define BGP_TRACE_SERDES _BN(12) // SerDes Init +#define BGP_TRACE_TESTINT _BN(13) // Test Interface, ECID, Config +#define BGP_TRACE_ETHTX _BN(14) // Ethernet Transmit +#define BGP_TRACE_ETHRX _BN(15) // Ethernet Receive +#define BGP_TRACE_POWER _BN(16) // Power Control +#define BGP_TRACE_PROCESS _BN(17) // Process/Thread Mapping +#define BGP_TRACE_EXIT_SUM _BN(18) // Report Per-Core Interrupt and Error Summary on exit() +#define BGP_TRACE_SCHED _BN(19) // Report Scheduler Information +#define BGP_TRACE_RAS _BN(20) // Report RAS Events (in addition to sending to Host) +#define BGP_TRACE_ECID _BN(21) // Report UCI and ECID on boot +#define BGP_TRACE_FUTEX _BN(22) // Trace Futex operations +#define BGP_TRACE_MemAlloc _BN(23) // Trace MMAP and Shared Memory operations +#define BGP_TRACE_WARNINGS _BN(30) // Trace Warnings +#define BGP_TRACE_VERBOSE _BN(31) // Verbose Tracing Modifier + +// Enable tracking of Regression Suite coverage and report UCI+ECID on boot +#define BGP_PERS_TRACE_DEFAULT (BGP_TRACE_CONFIG | BGP_TRACE_ECID) + + +typedef struct BGP_Personality_Kernel_t + { + uint32_t UniversalComponentIdentifier; // see include/common/bgp_ras.h + + uint32_t FreqMHz; // Clock_X1 Frequency in MegaHertz (eg 1000) + + uint32_t RASPolicy; // Verbosity level, and other RAS Reporting Controls + + // Process Config: + // Each byte represents a process (1 to 4 processes supported) + // No core can be assigned to more than 1 process. + // Cores assigned to no process are disabled. + // Cores with in a process share the same address space. + // Separate processes have distinct address spaces. + // Within each process (0 to 4 cores assigned to a process): + // Lower nibble is bitmask of which core belongs to that process. + // Upper nibble is bitmask whether that thread is privileged or user. + // Processes with zero cores do not exist. + // E.g., for Diagnostics, we use 0xFF000000, which means + // that all 4 cores run privileged in process 0. + uint32_t ProcessConfig; + + uint32_t TraceConfig; // Kernel Tracing Enables + uint32_t NodeConfig; // Kernel Driver Enables + uint32_t L1Config; // L1 Config and setup controls + uint32_t L2Config; // L2 and Snoop Config and setup controls + uint32_t L3Config; // L3 Config and setup controls + uint32_t L3Select; // L3 Cache and Bank Selection controls + + uint32_t SharedMemMB; // Memory to Reserve for Sharing among Processes + + uint32_t ClockStop0; // Upper 11Bits of ClockStop, enabled if Non-zero + uint32_t ClockStop1; // Lower 32Bits of ClockStop, enabled if Non-zero + } + BGP_Personality_Kernel_t; + + +// Defaults for DDR Config +#define BGP_PERS_DDR_PBX0_DEFAULT (0x411D1512) // PBX DCRs setting (in IBM bit numbering) +#define BGP_PERS_DDR_PBX1_DEFAULT (0x40000000) // PBX DCRs setting (in IBM bit numbering) +#define BGP_PERS_DDR_MemConfig0_DEFAULT (0x81fc4080) // MemConfig +#define BGP_PERS_DDR_MemConfig1_DEFAULT (0x0C0ff800) // MemConfig +#define BGP_PERS_DDR_ParmCtl0_DEFAULT (0x3216c008) // Parm Control +#define BGP_PERS_DDR_ParmCtl1_DEFAULT (0x4168c323) // Parm Control +#define BGP_PERS_DDR_MiscCtl0_DEFAULT (0) // Misc. Control +#define BGP_PERS_DDR_MiscCtl1_DEFAULT (0) // Misc. Control +#define BGP_PERS_DDR_CmdBufMode0_DEFAULT (0x00400fdf) // Command Buffer Mode +#define BGP_PERS_DDR_CmdBufMode1_DEFAULT (0xffc80600) // Command Buffer Mode +#define BGP_PERS_DDR_RefrInterval0_DEFAULT (0xD1000002) // Refresh Interval +#define BGP_PERS_DDR_RefrInterval1_DEFAULT (0x04000000) // Refresh Interval +#define BGP_PERS_DDR_ODTCtl0_DEFAULT (0) // ODT Control +#define BGP_PERS_DDR_ODTCtl1_DEFAULT (0) // ODT Control +#define BGP_PERS_DDR_DataStrobeCalib0_DEFAULT (0x08028a64) // Data Strobe Calibration +#define BGP_PERS_DDR_DataStrobeCalib1_DEFAULT (0xa514c805) // Data Strobe Calibration +#define BGP_PERS_DDR_DQSCtl_DEFAULT (0x00000168) // DQS Control +#define BGP_PERS_DDR_Throttle_DEFAULT (0) // DDR Throttle +//1#define BGP_PERS_DDR_DDRSizeMB_DEFAULT (4096) // Total DDR size in MegaBytes (512MB - 16384MB). +#define BGP_PERS_DDR_DDRSizeMB_DEFAULT (1024) // Total DDR size in MegaBytes (512MB - 16384MB). +//1#define BGP_PERS_DDR_Chips_DEFAULT (0x0B) // Type of DDR chips +#define BGP_PERS_DDR_Chips_DEFAULT (0x09) // Type of DDR chips +#define BGP_PERS_DDR_CAS_DEFAULT (4) // CAS Latency (3, 4, or 5) + + +#define BGP_PERS_DDRFLAGS_ENABLE_Scrub _BN(0) // Enable DDR Slow Scrub when 1 + +// DDRFLAGS default: Enable Slow Scrub. +#define BGP_PERS_DDRFLAGS_DEFAULT (BGP_PERS_DDRFLAGS_ENABLE_Scrub) + +#define BGP_PERS_SRBS0_DEFAULT (0) +#define BGP_PERS_SRBS1_DEFAULT (0) + +typedef struct BGP_Personality_DDR_t + { + uint32_t DDRFlags; // Misc. Flags and Settings + uint32_t SRBS0; // Controller 0 SRBS/CK Settings + uint32_t SRBS1; // Controller 1 SRBS/CK Settings + uint32_t PBX0; // PBX DCRs setting (in IBM bit numbering) + uint32_t PBX1; // PBX DCRs setting (in IBM bit numbering) + uint32_t MemConfig0; // MemConfig + uint32_t MemConfig1; // MemConfig + uint32_t ParmCtl0; // Parm Control + uint32_t ParmCtl1; // Parm Control + uint32_t MiscCtl0; // Misc. Control + uint32_t MiscCtl1; // Misc. Control + uint32_t CmdBufMode0; // Command Buffer Mode + uint32_t CmdBufMode1; // Command Buffer Mode + uint32_t RefrInterval0; // Refresh Interval + uint32_t RefrInterval1; // Refresh Interval + uint32_t ODTCtl0; // ODT Control + uint32_t ODTCtl1; // ODT Control + uint32_t DataStrobeCalib0; // Data Strobe Calibration + uint32_t DataStrobeCalib1; // Data Strobe Calibration + uint32_t DQSCtl; // DQS Control + uint32_t Throttle; // DDR Throttle + uint16_t DDRSizeMB; // Total DDR size in MegaBytes (512MB - 16384MB). + uint8_t Chips; // Type of DDR chips + uint8_t CAS; // CAS Latency (3, 4, or 5) + } + BGP_Personality_DDR_t; + + +typedef struct BGP_Personality_Networks_t + { + uint32_t BlockID; // a.k.a. PartitionID + + uint8_t Xnodes, + Ynodes, + Znodes, + Xcoord, + Ycoord, + Zcoord; + + // PSet Support + uint16_t PSetNum; + uint32_t PSetSize; + uint32_t RankInPSet; + + uint32_t IOnodes; + uint32_t Rank; // Rank in Block (or Partition) + uint32_t IOnodeRank; // Rank (and therefore P2P Addr) of my I/O Node + uint16_t TreeRoutes[ 16 ]; + } + BGP_Personality_Networks_t; + + +typedef struct BGP_IP_Addr_t + { + // IPv6 Addresses are 16 bytes, where the + // lower 4 (indices 12-15) can be used for IPv4 address. + uint8_t octet[ 16 ]; + } + BGP_IP_Addr_t; + + +typedef struct BGP_Personality_Ethernet_t + { + uint16_t MTU; // Initial emac MTU size + uint8_t EmacID[6]; // MAC address for emac + BGP_IP_Addr_t IPAddress; // IPv6/IPv4 address of this node + BGP_IP_Addr_t IPNetmask; // IPv6/IPv4 netmask + BGP_IP_Addr_t IPBroadcast; // IPv6/IPv4 broadcast address + BGP_IP_Addr_t IPGateway; // IPv6/IPv4 initial gateway (zero if none) + BGP_IP_Addr_t NFSServer; // IPv6/IPv4 NFS system software server address + BGP_IP_Addr_t serviceNode; // IPv6/IPv4 address of service node + + // NFS mount info + char NFSExportDir[BGP_PERSONALITY_LEN_NFSDIR]; + char NFSMountDir[BGP_PERSONALITY_LEN_NFSDIR]; + + // Security Key for Service Node authentication + uint8_t SecurityKey[BGP_PERSONALITY_LEN_SECKEY ]; + } + BGP_Personality_Ethernet_t; + + + +#define BGP_PERS_BLKCFG_IPOverCollective _BN(31) +#define BGP_PERS_BLKCFG_IPOverTorus _BN(30) +#define BGP_PERS_BLKCFG_IPOverCollectiveVC _BN(29) +#define BGP_PERS_BLKCFG_CIOModeSel(x) _B2(28,x) +#define BGP_PERS_BLKCFG_bgsysFSSel(x) _B3(26,x) +#define BGP_PERS_BLKCFG_CIOMode_Full 0 +#define BGP_PERS_BLKCFG_CIOMode_MuxOnly 1 +#define BGP_PERS_BLKCFG_CIOMode_None 2 +#define BGP_PERS_BLKCFG_bgsys_NFSv3 0 +#define BGP_PERS_BLKCFG_bgsys_NFSv4 1 +#define BGP_PERS_BLKCFG_DEFAULT (BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full) | \ + BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3)) + +typedef struct TBGP_Personality_t + { + uint16_t CRC; + uint8_t Version; + uint8_t PersonalitySizeWords; + + BGP_Personality_Kernel_t Kernel_Config; + + BGP_Personality_DDR_t DDR_Config; + + BGP_Personality_Networks_t Network_Config; + + BGP_Personality_Ethernet_t Ethernet_Config; + + uint8_t Block_Config; + uint8_t padd[7]; // Pad size to multiple of 16 bytes (== width of DEVBUS_DATA tdr) + // to simplify jtag operations. See issue #140. + } + BGP_Personality_t; + + +// Define a static initializer for default configuration. (DEFAULTS FOR SIMULATION) +// This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c +#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER { \ + 0, /* CRC */ \ + BGP_PERSONALITY_VERSION, /* Version */ \ + (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \ + { /* BGP_Personality_Kernel_t: */ \ + 0, /* MachineLocation */ \ + BGP_DEFAULT_FREQ, /* FreqMHz */ \ + BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \ + BGP_PERS_PROCESSCONFIG_DEFAULT, /* ProcessConfig */ \ + BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \ + BGP_PERS_NODECONFIG_DEFAULT, /* NodeConfig */ \ + BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \ + BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \ + BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \ + BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \ + 0, /* SharedMemMB */ \ + 0, /* ClockStop0 */ \ + 0 /* ClockStop1 */ \ + }, \ + { /* BGP_Personality_DDR_t: */ \ + BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \ + BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \ + BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \ + BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \ + BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \ + BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \ + BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \ + BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \ + BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \ + BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \ + BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \ + BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \ + BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \ + BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \ + BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \ + BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \ + BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \ + BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \ + BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \ + BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \ + BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \ + BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \ + BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \ + BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \ + }, \ + { /* BGP_Personality_Networks_t: */ \ + 0, /* BlockID */ \ + 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \ + 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \ + 0, /* PSetNum */ \ + 0, /* PSetSize */ \ + 0, /* RankInPSet */ \ + 0, /* IOnodes */ \ + 0, /* Rank */ \ + 0, /* IOnodeRank */ \ + { 0, } /* TreeRoutes[ 16 ] */ \ + }, \ + { /* BGP_Personality_Ethernet_t: */ \ + 1536, /* mtu */ \ + { 0, }, /* EmacID[6] */ \ + { { 0x00,0x00,0x00,0x00, /* IPAddress */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0xFF,0xFF,0xFF,0x70 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPGateway */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* NFSServer */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* serviceNode */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + "", /* NFSExportDir[32] */ \ + "", /* NFSMountDir[32] */ \ + { 0x00, } /* SecurityKey[32] */ \ + }, \ + 0, /* Block_Config */ \ + { 0, } /* padd[7] */ \ + } + + +// Define a static initializer for default configuration. (DEFAULTS FOR HARDWARE) +// This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c +#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER_FOR_HARDWARE { \ + 0, /* CRC */ \ + BGP_PERSONALITY_VERSION, /* Version */ \ + (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \ + { /* BGP_Personality_Kernel_t: */ \ + 0, /* MachineLocation */ \ + BGP_DEFAULT_FREQ, /* FreqMHz */ \ + BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \ + BGP_PERS_PROCESSCONFIG_SMP, /* ProcessConfig */ \ + BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \ + BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE, /* NodeConfig */ \ + BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \ + BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \ + BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \ + BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \ + 0, /* SharedMemMB */ \ + 0, /* ClockStop0 */ \ + 0 /* ClockStop1 */ \ + }, \ + { /* BGP_Personality_DDR_t: */ \ + BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \ + BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \ + BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \ + BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \ + BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \ + BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \ + BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \ + BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \ + BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \ + BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \ + BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \ + BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \ + BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \ + BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \ + BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \ + BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \ + BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \ + BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \ + BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \ + BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \ + BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \ + BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \ + BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \ + BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \ + }, \ + { /* BGP_Personality_Networks_t: */ \ + 0, /* BlockID */ \ + 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \ + 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \ + 0, /* PSetNum */ \ + 0, /* PSetSize */ \ + 0, /* RankInPSet */ \ + 0, /* IOnodes */ \ + 0, /* Rank */ \ + 0, /* IOnodeRank */ \ + { 0, } /* TreeRoutes[ 16 ] */ \ + }, \ + { /* BGP_Personality_Ethernet_t: */ \ + 1536, /* mtu */ \ + { 0, }, /* EmacID[6] */ \ + { { 0x00,0x00,0x00,0x00, /* IPAddress */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0xFF,0xFF,0xFF,0x70 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPGateway */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* NFSServer */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* serviceNode */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + "", /* NFSExportDir[32] */ \ + "", /* NFSMountDir[32] */ \ + { 0x00, } /* SecurityKey[32] */ \ + }, \ + 0, /* Block_Config */ \ + { 0, } /* padd[7] */ \ + } + + + + +#endif // Add nothing below this line. diff --git a/arch/powerpc/include/asm/bluegene.h b/arch/powerpc/include/asm/bluegene.h new file mode 100644 index 00000000000000..d7b98e2a27e171 --- /dev/null +++ b/arch/powerpc/include/asm/bluegene.h @@ -0,0 +1,71 @@ +/* + * Blue Gene board definitions + * + * Todd Inglett <tinglett@us.ibm.com> + * + * Copyright 2005, 2007, 2009 International Business Machines, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __ASM_BLUEGENE_H__ +#define __ASM_BLUEGENE_H__ + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +void __init bgp_init_cns(void); +void bgp_udbg_putc(char c); +unsigned int bgp_get_irq(void); +void bgp_send_ipi(int cpu, int msg); +void bgp_init_IPI(int cpu, int msg); +void __init bgp_init_IRQ(void); + +/* Interrupt encoding for Blue Gene/P hardware). + * Given a BIC group and bit index within the group, + * bic_hw_to_irq(group, gint) returns the Linux IRQ number. + */ +static inline unsigned bic_hw_to_irq(unsigned group, unsigned gint) +{ + return ((group+1) << 5) | (gint & 0x1f); +} + + +/* Wrappers for CNS calls. + * Any pointers must point to locations that will not take TLB misses. + */ +int bluegene_testInboxAttention(void); +int bluegene_testForOutboxCompletion(void); +int bluegene_writeRASEvent_nonBlocking(unsigned facility, + unsigned unit, + unsigned short err_code, + unsigned numDetails, + unsigned details[]); +int bluegene_writeRASString(unsigned facility, + unsigned unit, + unsigned short err_code, + char* str); +int bluegene_writeRASString_nonBlocking(unsigned facility, + unsigned unit, + unsigned short err_code, + char* str); +int bluegene_writeToMailboxConsole(char *msg, unsigned msglen); +int bluegene_writeToMailboxConsole_nonBlocking(char *msg, unsigned msglen); +unsigned bluegene_readFromMailboxConsole(char *buf, unsigned bufsize); + +int bluegene_macResetPHY(void); +int bluegene_macTestRxLink(void); +int bluegene_macTestTxLink(void); + +int bluegene_takeCPU(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg)); + +int bluegene_getPersonality(void* buff, unsigned buffSize); + +int bluegene_mapXEMAC(void* baseAddr); + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif diff --git a/arch/powerpc/include/asm/bluegene_ras.h b/arch/powerpc/include/asm/bluegene_ras.h new file mode 100644 index 00000000000000..05757e1b4c9d5c --- /dev/null +++ b/arch/powerpc/include/asm/bluegene_ras.h @@ -0,0 +1,107 @@ +/* + * Andrew Tauferner + * + * Copyright 2006, 2007 International Business Machines + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __BLUEGENE_RAS_H__ +#define __BLUEGENE_RAS_H__ + + +typedef enum { + bg_comp_none = 0x00, + bg_comp_kernel = 0x01, + bg_comp_application = 0x02, + bg_comp_card = 0x03, + bg_comp_mc = 0x04, + bg_comp_mcserver = 0x05, + bg_comp_mmcs = 0x06, + bg_comp_diags = 0x07, + + bg_comp_max // always last +} bg_ras_comp; + + +typedef enum { + bg_subcomp_none = 0x00, + bg_subcomp_ppc450 = 0x01, + bg_subcomp_fpu = 0x02, + bg_subcomp_snoop = 0x03, + bg_subcomp_dp0 = 0x04, + bg_subcomp_dp1 = 0x05, + bg_subcomp_l2 = 0x06, + bg_subcomp_l3 = 0x07, + bg_subcomp_ddr = 0x08, + bg_subcomp_sram = 0x09, + bg_subcomp_dma = 0x0a, + bg_subcomp_testint = 0x0b, + bg_subcomp_testint_dcr = 0x0c, + bg_subcomp_lockbox = 0x0d, + bg_subcomp_plb = 0x0e, + bg_subcomp_collective = 0x0f, + bg_subcomp_torus = 0x10, + bg_subcomp_globint = 0x11, + bg_subcomp_serdes = 0x12, + bg_subcomp_upc = 0x13, + bg_subcomp_dcr = 0x14, + bg_subcomp_bic = 0x15, + bg_subcomp_devbus = 0x16, + bg_subcomp_netbus = 0x17, + bg_subcomp_envmon = 0x18, + bg_subcomp_tomal = 0x19, + bg_subcomp_xemac = 0x1a, + bg_subcomp_phy = 0x1b, + bg_subcomp_bootloader = 0x1c, + bg_subcomp_cnk = 0x1d, + bg_subcomp_ciod = 0x1e, + bg_subcomp_svc_host = 0x1f, + bg_subcomp_diagnostic = 0x20, + bg_subcomp_application = 0x21, + bg_subcomp_linux = 0x22, + bg_subcomp_cns = 0x23, + bg_subcomp_e10000 = 0x24, + + bg_subcomp_max // always last +} bg_ras_subcomp; + + +typedef enum { + bg_code_none = 0x00, + bg_code_halted = 0x01, + bg_code_script_error = 0x02, + bg_code_boot_complete = 0x03, + bg_code_panic = 0x04, + bg_code_oops = 0x05, + bg_code_tty_alloc_failure = 0x06, + bg_code_tty_reg_failure = 0x07, + bg_code_mbox_thread_create_failure = 0x08, + bg_code_sysrq_thread_create_failure = 0x09, + bg_code_oom = 0x0a, + bg_ras_max // always last +} bg_ras_code; + + +/* + * bg_ras -- RAS data structure + */ +#define BG_RAS_DATA_MAX 216 +typedef struct { + unsigned short comp; + unsigned short subcomp; + unsigned short code; + unsigned short length; + unsigned char data[BG_RAS_DATA_MAX]; +} bg_ras; + + +#define BG_RAS_FILE "/proc/ras" +#define BG_RAS_ASCII_FILE "/proc/ras_ascii" + + +#endif // __BLUEGENE_RAS_H__ diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h index 27cc6fdcd3b79e..18693a73b0433c 100644 --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -8,6 +8,9 @@ #define PPC44x_MMUCR_TID 0x000000ff #define PPC44x_MMUCR_STS 0x00010000 +#define PPC44x_MMUCR_SWOA 0x01000000 +#define PPC44x_MMUCR_U1TE 0x00400000 +#define PPC44x_MMUCR_U2SWOAE 0x00200000 #define PPC44x_TLB_PAGEID 0 #define PPC44x_TLB_XLAT 1 @@ -32,9 +35,11 @@ /* Storage attribute and access control fields */ #define PPC44x_TLB_ATTR_MASK 0x0000ff80 +#define PPC44x_TLB_WL1 0x00100000 /* Write-through L1 */ #define PPC44x_TLB_U0 0x00008000 /* User 0 */ #define PPC44x_TLB_U1 0x00004000 /* User 1 */ #define PPC44x_TLB_U2 0x00002000 /* User 2 */ +#define PPC44x_TLB_SWOA PPC44x_TLB_U2 /* SWOA when MMUCR U2SWOAE is enabled */ #define PPC44x_TLB_U3 0x00001000 /* User 3 */ #define PPC44x_TLB_W 0x00000800 /* Caching is write-through */ #define PPC44x_TLB_I 0x00000400 /* Caching is inhibited */ @@ -67,8 +72,13 @@ typedef struct { #endif /* !__ASSEMBLY__ */ #ifndef CONFIG_PPC_EARLY_DEBUG_44x +#ifndef CONFIG_BGP #define PPC44x_EARLY_TLBS 1 #else +/* Bluegene maps firmware with an early TLB. */ +#define PPC44x_EARLY_TLBS 2 +#endif +#else #define PPC44x_EARLY_TLBS 2 #define PPC44x_EARLY_DEBUG_VIRTADDR (ASM_CONST(0xf0000000) \ | (ASM_CONST(CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW) & 0xffff)) diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h index 1458d95003814d..9256c1e5713164 100644 --- a/arch/powerpc/include/asm/page_32.h +++ b/arch/powerpc/include/asm/page_32.h @@ -9,7 +9,8 @@ #define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32 -#ifdef CONFIG_NOT_COHERENT_CACHE +/* For BGP, it is convenient for 'kmalloc' to come back with 32-byte aligned units for torus DMA */ +#if defined(CONFIG_NOT_COHERENT_CACHE) || defined(CONFIG_BGP) #define ARCH_KMALLOC_MINALIGN L1_CACHE_BYTES #endif diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 1a0d628eb114bc..471ab83ec18982 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -84,6 +84,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ #define STXVD2X(xs, ra, rb) .long (0x7c000798 | VSX_XX1((xs), (ra), (rb))) #define LXVD2X(xs, ra, rb) .long (0x7c000698 | VSX_XX1((xs), (ra), (rb))) +#define LFPDX(frt,ra,rb) .long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(462<<1) +#define STFPDX(frt,ra,rb) .long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(974<<1) + #define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base) #define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base) #define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base) @@ -93,18 +96,26 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \ #define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base) #define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base) -#define SAVE_FPR(n, base) stfd n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base) -#define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base) -#define SAVE_4FPRS(n, base) SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base) -#define SAVE_8FPRS(n, base) SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base) -#define SAVE_16FPRS(n, base) SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base) -#define SAVE_32FPRS(n, base) SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base) -#define REST_FPR(n, base) lfd n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base) -#define REST_2FPRS(n, base) REST_FPR(n, base); REST_FPR(n+1, base) -#define REST_4FPRS(n, base) REST_2FPRS(n, base); REST_2FPRS(n+2, base) -#define REST_8FPRS(n, base) REST_4FPRS(n, base); REST_4FPRS(n+4, base) -#define REST_16FPRS(n, base) REST_8FPRS(n, base); REST_8FPRS(n+8, base) -#define REST_32FPRS(n, base) REST_16FPRS(n, base); REST_16FPRS(n+16, base) +#ifndef CONFIG_BGP +/* Normal FPR save/restore. */ +#define SAVE_FPR(n, b, base) stfd n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base) +#define REST_FPR(n, b, base) lfd n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base) +#else +/* Blue Gene "double-hummer" FPR save/restore. */ +#define SAVE_FPR(n,b,base) li b,THREAD_FPR0+(16*(n)); STFPDX(n,base,b) +#define REST_FPR(n,b,base) li b,THREAD_FPR0+(16*(n)); LFPDX(n,base,b) +#endif + +#define SAVE_2FPRS(n, b, base) SAVE_FPR(n, b, base); SAVE_FPR(n+1, b, base) +#define SAVE_4FPRS(n, b, base) SAVE_2FPRS(n, b, base); SAVE_2FPRS(n+2, b, base) +#define SAVE_8FPRS(n, b, base) SAVE_4FPRS(n, b, base); SAVE_4FPRS(n+4, b, base) +#define SAVE_16FPRS(n, b, base) SAVE_8FPRS(n, b, base); SAVE_8FPRS(n+8, b, base) +#define SAVE_32FPRS(n, b, base) SAVE_16FPRS(n, b, base); SAVE_16FPRS(n+16, b, base) +#define REST_2FPRS(n, b, base) REST_FPR(n, b, base); REST_FPR(n+1, b, base) +#define REST_4FPRS(n, b, base) REST_2FPRS(n, b, base); REST_2FPRS(n+2, b, base) +#define REST_8FPRS(n, b, base) REST_4FPRS(n, b, base); REST_4FPRS(n+4, b, base) +#define REST_16FPRS(n, b, base) REST_8FPRS(n, b, base); REST_8FPRS(n+8, b, base) +#define REST_32FPRS(n, b, base) REST_16FPRS(n, b, base); REST_16FPRS(n+16, b, base) #define SAVE_VR(n,b,base) li b,THREAD_VR0+(16*(n)); stvx n,b,base #define SAVE_2VRS(n,b,base) SAVE_VR(n,b,base); SAVE_VR(n+1,b,base) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index d3466490104a55..4dcd99a968683b 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -14,8 +14,15 @@ #ifdef CONFIG_VSX #define TS_FPRWIDTH 2 +#define TS_FPRALIGN +#else +#ifdef CONFIG_BGP +#define TS_FPRWIDTH 2 +#define TS_FPRALIGN __attribute__((aligned(16))) #else #define TS_FPRWIDTH 1 +#define TS_FPRALIGN +#endif #endif #ifndef __ASSEMBLY__ @@ -95,8 +102,12 @@ extern struct task_struct *last_task_used_spe; /* This decides where the kernel will search for a free chunk of vm * space during mmap's. */ +#if defined(CONFIG_TASK_UNMAPPED_BASE) +#define TASK_UNMAPPED_BASE (CONFIG_TASK_UNMAPPED_BASE) +#else #define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3) #endif +#endif #ifdef CONFIG_PPC64 /* 64-bit user address space is 44-bits (16TB user VM) */ @@ -166,7 +177,7 @@ struct thread_struct { unsigned long dbcr1; #endif /* FP and VSX 0-31 register set */ - double fpr[32][TS_FPRWIDTH]; + double fpr[32][TS_FPRWIDTH] TS_FPRALIGN; struct { unsigned int pad; @@ -309,7 +320,7 @@ static inline void prefetchw(const void *x) #define spin_lock_prefetch(x) prefetchw(x) -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) || defined(CONFIG_BGP) #define HAVE_ARCH_PICK_MMAP_LAYOUT #endif diff --git a/arch/powerpc/include/bpcore/bgp_dma_memmap.h b/arch/powerpc/include/bpcore/bgp_dma_memmap.h new file mode 100644 index 00000000000000..431bed6512cb82 --- /dev/null +++ b/arch/powerpc/include/bpcore/bgp_dma_memmap.h @@ -0,0 +1,205 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + + +#ifndef _BGP_DMA_MEMMAP_H_ +#define _BGP_DMA_MEMMAP_H_ + +#define _BGP_DMA_NUM_INJ_FIFO_GROUPS 4 +#define _BGP_DMA_NUM_INJ_FIFOS_PER_GROUP 32 +#define _BGP_DMA_NUM_INJ_FIFOS (_BGP_DMA_NUM_INJ_FIFO_GROUPS * _BGP_DMA_NUM_INJ_FIFOS_PER_GROUP) + +#define _BGP_DMA_NUM_REC_FIFO_GROUPS 4 +#define _BGP_DMA_NUM_REC_FIFOS_PER_GROUP 9 +#define _BGP_DMA_NUM_REC_FIFOS (_BGP_DMA_NUM_REC_FIFO_GROUPS * _BGP_DMA_NUM_REC_FIFOS_PER_GROUP) + +/* size = end - start - BGP_FIFO_SAFETY_MARGIN */ +/* so you can disinguish between full and empty, in 16 byte units */ +#define _BGP_DMA_FIFO_SAFETY_MARGIN 1 +#define _BGP_DMA_QUADS_PER_PACKET 16 + +#define _BGP_DMA_NUM_COUNTER_GROUPS 4 +#define _BGP_DMA_NUM_COUNTERS_PER_GROUP 64 +#define _BGP_DMA_NUM_COUNTERS (_BGP_DMA_NUM_COUNTER_GROUPS * _BGP_DMA_NUM_COUNTERS_PER_GROUP) + +/* these are the lower 12 bits */ +/* #define _BGP_DMA_GROUP_A(g) ((g)*0x1000) */ + +/* ------------------------------------------------ */ +/* Macros defining absolute virtual address */ +/* ------------------------------------------------ */ +#define _BGP_VA_DMA_GROUP_A(g) (_BGP_VA_DMA + ((g)*0x1000)) + +/* offset start of iDMA */ +#define _BGP_VA_iDMA_GROUP_START(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0 ) + +/* repeated 32 times i=0 to 31 */ +#define _BGP_VA_iDMA_START(g,i) (_BGP_VA_DMA_GROUP_A(g) + ((i)*0x0010) ) +#define _BGP_VA_iDMA_END(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0004+(i)*0x0010) ) +#define _BGP_VA_iDMA_HEAD(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0008+(i)*0x0010) ) +#define _BGP_VA_iDMA_TAIL(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x000C+(i)*0x0010) ) +#define _BGP_VA_iDMA_NOT_EMPTY(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0200) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0204) */ +#define _BGP_VA_iDMA_AVAILABLE(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0208) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x020C) */ +#define _BGP_VA_iDMA_THRESHOLD_CROSSED(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0210) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0214) */ +#define _BGP_VA_iDMA_CLEAR_THRESHOLD_CROSSED(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0218) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x021C) */ +#define _BGP_VA_iDMA_ACTIVATED(g) (_BGP_VA_DMA_GROUP_A(g) + 0x220) +#define _BGP_VA_iDMA_ACTIVATE(g) (_BGP_VA_DMA_GROUP_A(g) + 0x224) +#define _BGP_VA_iDMA_DEACTIVATE(g) (_BGP_VA_DMA_GROUP_A(g) + 0x228) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x022C) to ( _BGP_VA_DMA_GROUP_A(g)+0x02FF) */ +/* repeated twice, i=0 to 1 */ +#define _BGP_VA_iDMA_COUNTER_ENABLED(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0300 +(i)*0x0004) ) +#define _BGP_VA_iDMA_COUNTER_ENABLE(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0308 +(i)*0x0004) ) +#define _BGP_VA_iDMA_COUNTER_DISABLE(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0310 +(i)*0x0004) ) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0318) to ( _BGP_VA_DMA_GROUP_A(g)+0x031C) */ +/* repeated twice, i=0 to 1 */ +#define _BGP_VA_iDMA_COUNTER_HIT_ZERO(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0320 +(i)*0x0004) ) +#define _BGP_VA_iDMA_COUNTER_CLEAR_HIT_ZERO(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0328 +(i)*0x0004) ) +#define _BGP_VA_iDMA_COUNTER_GRP_STATUS(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0330) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0334) to ( _BGP_VA_DMA_GROUP_A(g)+0x03FC) */ +/* repeated 64 times i=0 to 63 */ +#define _BGP_VA_iDMA_COUNTER(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0400 +(i)*0x0010) ) +#define _BGP_VA_iDMA_COUNTER_INCREMENT(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0404 +(i)*0x0010) ) +#define _BGP_VA_iDMA_COUNTER_BASE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0408 +(i)*0x0010) ) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x040C) to ( _BGP_VA_DMA_GROUP_A(g)+0x07FC) */ + +/* offset start of rDMA */ +#define _BGP_VA_rDMA_GROUP_START(g) ( _BGP_VA_DMA_GROUP_A(g) + 0x0800 ) + +/* repeated 8 times i=0 to 7 */ +#define _BGP_VA_rDMA_START(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0800 + (i)*0x0010) ) +#define _BGP_VA_rDMA_END(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0804 + (i)*0x0010) ) +#define _BGP_VA_rDMA_HEAD(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0808 + (i)*0x0010) ) +#define _BGP_VA_rDMA_TAIL(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x080C + (i)*0x0010) ) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0890) to ( _BGP_VA_DMA_GROUP_A(g)+0x08FC) */ +/* repeated 16 times, 0 to 15 */ +/* below addresses have storage backing them, but are not used by the DMA */ +#define _BGP_NUM_rDMA_UNUSED 16 +#define _BGP_VA_rDMA_UNUSED(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0900 + (i)*0x0004) ) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0940) to ( _BGP_VA_DMA_GROUP_A(g)+0x09FC) */ + +#define _BGP_VA_rDMA_TAIL(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x080C + (i)*0x0010) ) +/* / repeated 2 times i=0 to 1 */ +#define _BGP_VA_rDMA_NOT_EMPTY(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0A00 + (i)*0x0004) ) +#define _BGP_VA_rDMA_AVAILABLE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0A08 + (i)*0x0004) ) +#define _BGP_VA_rDMA_THRESHOLD_CROSSED(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0A10 + (i)*0x0004) ) +#define _BGP_VA_rDMA_CLEAR_THRESHOLD_CROSSED(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0A18 + (i)*0x0004) ) + /* HOLE: ( _BGP_DMA_GROUP_A(g)+0x0A1C) to ( _BGP_VA_DMA_GROUP_A(g)+0x0AFC) */ +/* repeat 2 times, i=0 to 1 */ +#define _BGP_VA_rDMA_COUNTER_ENABLED(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B00 + (i)*0x0004) ) +#define _BGP_VA_rDMA_COUNTER_ENABLE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B08 + (i)*0x0004) ) +#define _BGP_VA_rDMA_COUNTER_DISABLE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B10 + (i)*0x0004) ) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0B18) to ( _BGP_VA_DMA_GROUP_A(g)+0x0B1C) */ +/* repeat 2 times, i=0 to 1 */ +#define _BGP_VA_rDMA_COUNTER_HIT_ZERO(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B20 + (i)*0x0004) ) +#define _BGP_VA_rDMA_COUNTER_CLEAR_HIT_ZERO(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B28 + (i)*0x0004) ) +#define _BGP_VA_rDMA_COUNTER_GRP_STATUS(g) ( _BGP_VA_DMA_GROUP_A(g) + 0x0B30) + /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0B34) to ( _BGP_VA_DMA_GROUP_A(g)+0x0BFC) */ +/* repeat 64 times, i=0 to 63 */ +#define _BGP_VA_rDMA_COUNTER(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0C00 + (i)*0x0010) ) +#define _BGP_VA_rDMA_COUNTER_INCREMENT(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0C04 + (i)*0x0010) ) +#define _BGP_VA_rDMA_COUNTER_BASE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0C08 + (i)*0x0010) ) +#define _BGP_VA_rDMA_COUNTER_MAX(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0C0C + (i)*0x0010) ) + + + +/* --------------------------------------- */ +/* Macros defining address offset */ +/* --------------------------------------- */ + + +/* these are the lower 12 bits */ +#define _BGP_DMA_GROUP_A_OFFSET(g) ((g)*0x1000) + +/* ---------------------- */ +/* offset start of iDMA */ +/* ---------------------- */ +#define _BGP_iDMA_GROUP_START_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0 ) + +/* repeated 32 times i=0 to 31 */ +#define _BGP_iDMA_START_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+(i)*0x0010) +#define _BGP_iDMA_END_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0004+(i)*0x0010) +#define _BGP_iDMA_HEAD_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0008+(i)*0x0010) +#define _BGP_iDMA_TAIL_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x000C+(i)*0x0010) +#define _BGP_iDMA_NOT_EMPTY_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0200) + /* HOLE ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0204) */ +#define _BGP_iDMA_AVAILABLE_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0208) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x020C) */ +#define _BGP_iDMA_THRESHOLD_CROSSED_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0210) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0214) */ +#define _BGP_iDMA_CLEAR_THRESHOLD_CROSSED_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0218) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x021C) */ +#define _BGP_iDMA_ACTIVATED_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x220) +#define _BGP_iDMA_ACTIVATE_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x224) +#define _BGP_iDMA_DEACTIVATE_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x228) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x022C) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x02FF) */ +/* repeated twice, i=0 to 1 */ +#define _BGP_iDMA_COUNTER_ENABLED_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0300 +(i)*0x0004) +#define _BGP_iDMA_COUNTER_ENABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0308 +(i)*0x0004) +#define _BGP_iDMA_COUNTER_DISABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0310 +(i)*0x0004) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0318) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x031C) */ +/* repeated twice, i=0 to 1 */ +#define _BGP_iDMA_COUNTER_HIT_ZERO_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0320 +(i)*0x0004) +#define _BGP_iDMA_COUNTER_CLEAR_HIT_ZERO_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0328 +(i)*0x0004) +#define _BGP_iDMA_COUNTER_GRP_STATUS_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0330) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0334) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x03FC) */ +/* repeated 64 times i=0 to 63 */ +#define _BGP_iDMA_COUNTER_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0400 +(i)*0x0010) +#define _BGP_iDMA_COUNTER_INCREMENT_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0404 +(i)*0x0010) +#define _BGP_iDMA_COUNTER_BASE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0408 +(i)*0x0010) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x040C) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x07FC) */ + + +/* ----------------------- */ +/* offset start of rDMA */ +/* ----------------------- */ +#define _BGP_rDMA_GROUP_START_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0800 ) + +/* repeated 8 times i=0 to 7 */ +#define _BGP_rDMA_START_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0800 + (i)*0x0010) +#define _BGP_rDMA_END_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0804 + (i)*0x0010) +#define _BGP_rDMA_HEAD_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0808 + (i)*0x0010) +#define _BGP_rDMA_TAIL_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x080C + (i)*0x0010) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0890) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x09FC) */ +/* / repeated 2 times i=0 to 1 */ +#define _BGP_rDMA_NOT_EMPTY_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A00 + (i)*0x0004) +#define _BGP_rDMA_AVAILABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A08 + (i)*0x0004) +#define _BGP_rDMA_THRESHOLD_CROSSED_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A10 + (i)*0x0004) +#define _BGP_rDMA_CLEAR_THRESHOLD_CROSSED_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A18 + (i)*0x0004) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A1C) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0AFC) */ +/* repeat 2 times, i=0 to 1 */ +#define _BGP_rDMA_COUNTER_ENABLED_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B00 + (i)*0x0004) +#define _BGP_rDMA_COUNTER_ENABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B08 + (i)*0x0004) +#define _BGP_rDMA_COUNTER_DISABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B10 + (i)*0x0004) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B18) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B1C) */ +/* repeat 2 times, i=0 to 1 */ +#define _BGP_rDMA_COUNTER_HIT_ZERO_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B20 + (i)*0x0004) +#define _BGP_rDMA_COUNTER_CLEAR_HIT_ZERO_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B28 + (i)*0x0004) +#define _BGP_rDMA_COUNTER_GRP_STATUS_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B30) + /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B34) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0BFC) */ +/* repeat 64 times, i=0 to 63 */ +#define _BGP_rDMA_COUNTER_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C00 + (i)*0x0010) +#define _BGP_rDMA_COUNTER_INCREMENT_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C04 + (i)*0x0010) +#define _BGP_rDMA_COUNTER_BASE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C08 + (i)*0x0010) +#define _BGP_rDMA_COUNTER_MAX_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C0C + (i)*0x0010) + +#endif diff --git a/arch/powerpc/include/bpcore/bgp_types.h b/arch/powerpc/include/bpcore/bgp_types.h new file mode 100644 index 00000000000000..e298fa764e4987 --- /dev/null +++ b/arch/powerpc/include/bpcore/bgp_types.h @@ -0,0 +1,71 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/** + * \file bpcore/bgp_types.h + */ + +#ifndef _BGP_TYPES_H_ /* Prevent multiple inclusion. */ +#define _BGP_TYPES_H_ + +#include <common/namespace.h> + +__BEGIN_DECLS + + +#if !defined(__ASSEMBLY__) && !defined(__BGP_HIDE_STANDARD_TYPES__) + +#include <common/alignment.h> + +#ifdef _AIX +#include <inttypes.h> +#elif ! defined(__LINUX_KERNEL__) +#include <stdint.h> +#include <sys/types.h> +#else +#include <linux/types.h> +#endif + + +typedef int8_t _bgp_i8_t; +typedef uint8_t _bgp_u8_t; +typedef int16_t _bgp_i16_t; +typedef uint16_t _bgp_u16_t; +typedef int32_t _bgp_i32_t; +typedef uint32_t _bgp_u32_t; +typedef int64_t _bgp_i64_t; +typedef uint64_t _bgp_u64_t; + +typedef union T_BGP_QuadWord + { + uint8_t ub[ 16]; + uint16_t us[ 8]; + uint32_t ul[ 4]; + uint64_t ull[ 2]; + float f[ 4]; + double d[ 2]; + } + ALIGN_QUADWORD _bgp_QuadWord_t; + +typedef _bgp_QuadWord_t _QuadWord_t; + +#endif /* !__ASSEMBLY__ && !__BGP_HIDE_STANDARD_TYPES__ */ + +__END_DECLS + +#endif /* Add nothing below this line. */ diff --git a/arch/powerpc/include/bpcore/ic_memmap.h b/arch/powerpc/include/bpcore/ic_memmap.h new file mode 100644 index 00000000000000..5ff376f4b622f0 --- /dev/null +++ b/arch/powerpc/include/bpcore/ic_memmap.h @@ -0,0 +1,803 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/** + * \file bpcore/ic_memmap.h + */ + + + +/** + * BGP Interrupt Controller Register mapping and bit definition. + * + * Note: preliminary register assignment. + */ + + +/* ************************************************************************* */ +/* Architected BGP Interrupt Controller Registers */ +/* ************************************************************************* */ +/* Authors: Jose R. Brunheroto, Martin Ohmacht */ +/* Reflects the contents of the document http://w3vlsi.watson.ibm.com// */ +/* */ +/* ************************************************************************* */ + + + +/* + + BIC CRIT hierarchy register + +------------------------------------+ + |0 1 2 3 4 5 6 7 8 9 ... 31 | + +------------------------------------+ + | | | | | | | | | | + | | | | | | | BIC UNIT 6 + | | | | | | | +-----+ + | | | | | | +-------------------------------- |0-31 | - + | | | | | | +-----+ + | | | | | | BIC UNIT 5 + | | | | | | +-----+ + | | | | | +---------------------------------- |0-31 | -. + | | | | | +-----+ + | | | | | BIC GROUP 4 + | | | | | +-----+ + | | | | +------------------------------------ |0-31 | - + | | | | +-----+ + | | | | BIC GROUP3 + | | | | +-----+ + | | | +-------------------------------------- |0-31 | - + | | | +-----+ + | | | BIC GROUP 2 + | | | +-----+ + | | +---------------------------------------- |0-31 | - + | | +-----+ + | | BIC GROUP 1 + | | +-----+ + | +------------------------------------------ |0-31 | - + | +-----+ + | BIC GROUP 0 + | +-----+ + +-------------------------------------------- |0-31 | - + +-----+ + + + BIC NCRIT hierarchy register + +------------------------------------+ + |0 1 2 3 4 5 6 7 8 9 ... 31 | + +------------------------------------+ + | | | | | | | | | | + | | | | | | | BIC UNIT 6 + | | | | | | | +-----+ + | | | | | | +-------------------------------- |0-31 | - + | | | | | | +-----+ + | | | | | | BIC UNIT 5 + | | | | | | +-----+ + | | | | | +---------------------------------- |0-31 | -. + | | | | | +-----+ + | | | | | BIC GROUP 4 + | | | | | +-----+ + | | | | +------------------------------------ |0-31 | - + | | | | +-----+ + | | | | BIC GROUP3 + | | | | +-----+ + | | | +-------------------------------------- |0-31 | - + | | | +-----+ + | | | BIC GROUP 2 + | | | +-----+ + | | +---------------------------------------- |0-31 | - + | | +-----+ + | | BIC GROUP 1 + | | +-----+ + | +------------------------------------------ |0-31 | - + | +-----+ + | BIC GROUP 0 + | +-----+ + +-------------------------------------------- |0-31 | - + +-----+ + + + BIC MCCU hierarchy register + +------------------------------------+ + |0 1 2 3 4 5 6 7 8 9 ... 31 | + +------------------------------------+ + | | | | | | | | | | + | | | | | | | BIC UNIT 6 + | | | | | | | +-----+ + | | | | | | +-------------------------------- |0-31 | - + | | | | | | +-----+ + | | | | | | BIC UNIT 5 + | | | | | | +-----+ + | | | | | +---------------------------------- |0-31 | -. + | | | | | +-----+ + | | | | | BIC GROUP 4 + | | | | | +-----+ + | | | | +------------------------------------ |0-31 | - + | | | | +-----+ + | | | | BIC GROUP3 + | | | | +-----+ + | | | +-------------------------------------- |0-31 | - + | | | +-----+ + | | | BIC GROUP 2 + | | | +-----+ + | | +---------------------------------------- |0-31 | - + | | +-----+ + | | BIC GROUP 1 + | | +-----+ + | +------------------------------------------ |0-31 | - + | +-----+ + | BIC GROUP 0 + | +-----+ + +-------------------------------------------- |0-31 | - + +-----+ + +*/ + + +#ifndef _IC_MEMMAP_H_ /* Prevent multiple inclusion */ +#define _IC_MEMMAP_H_ + + + +#define _BGP_IC_NUMBER_OF_GROUPS (10) /* number of groups (0..9 inclusive) */ + + + +#define _BGP_IC_TARGET_DISABLED 0x00 /* disabled */ +#define _BGP_IC_TARGET_NCRIT_BCAST 0x01 /* non-critical broadcast */ +#define _BGP_IC_TARGET_CRIT_BCAST 0x02 /* critical broadcast */ +#define _BGP_IC_TARGET_MCHK_BCAST 0x03 /* machine check */ + +#define _BGP_IC_TARGET_NCRIT_CORE0 0x04 /* non-critical core 0 */ +#define _BGP_IC_TARGET_NCRIT_CORE1 0x05 /* non-critical core 1 */ +#define _BGP_IC_TARGET_NCRIT_CORE2 0x06 /* non-critical core 2 */ +#define _BGP_IC_TARGET_NCRIT_CORE3 0x07 /* non-critical core 3 */ + +#define _BGP_IC_TARGET_CRIT_CORE0 0x08 /* critical core 0 */ +#define _BGP_IC_TARGET_CRIT_CORE1 0x09 /* critical core 1 */ +#define _BGP_IC_TARGET_CRIT_CORE2 0x0A /* critical core 2 */ +#define _BGP_IC_TARGET_CRIT_CORE3 0x0B /* critical core 3 */ + +#define _BGP_IC_TARGET_MCHK_CORE0 0x0C /* machine check core 0 */ +#define _BGP_IC_TARGET_MCHK_CORE1 0x0D /* machine check core 1 */ +#define _BGP_IC_TARGET_MCHK_CORE2 0x0E /* machine check core 2 */ +#define _BGP_IC_TARGET_MCHK_CORE3 0x0F /* machine check core 3 */ + + +typedef struct _BGP_IC_Group_t +{ + volatile unsigned int status; /* status (read and write) */ + volatile unsigned int rd_clr_status; /* status (read and clear) */ + volatile unsigned int status_clr; /* status (write and clear) */ + volatile unsigned int status_set; /* status (write and set) */ + + volatile unsigned int target_irq0_7; /* target selector (IRQ 0:7) */ + volatile unsigned int target_irq8_15; /* target selector (IRQ 8:15) */ + volatile unsigned int target_irq16_23; /* target selector (IRQ 16:23) */ + volatile unsigned int target_irq24_31; /* target selector (IRQ 24:31) */ + + union { + volatile unsigned int ncrit_masked_irq[ 4 ]; /* array for easier access */ + struct { + volatile unsigned int ncrit_0_masked_irq; /* non-critical core 0 masked irq (RO) */ + volatile unsigned int ncrit_1_masked_irq; /* non-critical core 1 masked irq */ + volatile unsigned int ncrit_2_masked_irq; /* non-critical core 2 masked irq */ + volatile unsigned int ncrit_3_masked_irq; /* non-critical core 3 masked irq */ + }; + }; + + union { + volatile unsigned int crit_masked_irq[ 4 ]; /* array for easier access */ + struct { + volatile unsigned int crit_0_masked_irq; /* critical core 0 masked irq (RO) */ + volatile unsigned int crit_1_masked_irq; /* critical core 1 masked irq */ + volatile unsigned int crit_2_masked_irq; /* critical core 2 masked irq */ + volatile unsigned int crit_3_masked_irq; /* critical core 3 masked irq */ + }; + }; + + union { + volatile unsigned int mchk_masked_irq[ 4 ]; /* array for easier access */ + struct { + volatile unsigned int mchk_0_masked_irq; /* machine check core 0 masked irq (RO) */ + volatile unsigned int mchk_1_masked_irq; /* machine check core 1 masked irq */ + volatile unsigned int mchk_2_masked_irq; /* machine check core 2 masked irq */ + volatile unsigned int mchk_3_masked_irq; /* machine check core 3 masked irq */ + }; + }; + + volatile unsigned int ti_mchk_mask; /* (RW) TestInt MachineCheck Mask */ + volatile unsigned int upc_time_stamp_mask; /* (RW) UPC Time Stamp Mask */ + volatile unsigned int clock_sync_stop_mask; /* (RW) Clock Sync-Stop Mask */ + + volatile unsigned int ti_mchk_wof; /* (RW) TestInt Mchk Who's on First */ + volatile unsigned int upc_time_stamp_wof; /* (RW) UPC Time Stamp Who's on First */ + volatile unsigned int clock_sync_stop_wof; /* (RW) Clock Sync-Stop Who's on First */ + + volatile unsigned int ti_mchk; /* (RO) TestInt Mchk */ + volatile unsigned int upc_time_stamp; /* (RO) UPC Time Stamp */ + volatile unsigned int clock_sync_stop; /* (RO) Clock Sync-Stop */ + + +} _BGP_IC_Group_t; + + + +#define _BGP_IC_MEM_GROUP_SIZE (0x80) /* group size in bytes */ + +/* macros for indexed access to grouups */ +#define _BGP_IC_MEM_GROUP_OFFSET(_grp) ( _BGP_IC_MEM_GROUP0_OFFSET + (_grp)*_BGP_IC_MEM_GROUP_SIZE ) + + +/* Defines BGP Interrupt Controller Register Offset (memory mapped access) */ +#define _BGP_IC_MEM_GROUP0_OFFSET (0x0000) /* Group 0 offset */ +#define _BGP_IC_MEM_GROUP1_OFFSET (0x0080) /* Group 1 offset */ +#define _BGP_IC_MEM_GROUP2_OFFSET (0x0100) /* Group 2 offset */ +#define _BGP_IC_MEM_GROUP3_OFFSET (0x0180) /* Group 3 offset */ +#define _BGP_IC_MEM_GROUP4_OFFSET (0x0200) /* Group 4 offset */ +#define _BGP_IC_MEM_GROUP5_OFFSET (0x0280) /* Group 5 offset */ +#define _BGP_IC_MEM_GROUP6_OFFSET (0x0300) /* Group 6 offset */ +#define _BGP_IC_MEM_GROUP7_OFFSET (0x0380) /* Group 7 offset */ +#define _BGP_IC_MEM_GROUP8_OFFSET (0x0400) /* Group 8 offset */ +#define _BGP_IC_MEM_GROUP9_OFFSET (0x0480) /* Group 9 offset */ + +/* reserved group offset */ +#define _BGP_IC_MEM_GROUP10_OFFSET (0x0500) /* Group 10 offset */ +#define _BGP_IC_MEM_GROUP11_OFFSET (0x0580) /* Group 11 offset */ +#define _BGP_IC_MEM_GROUP12_OFFSET (0x0600) /* Group 12 offset */ +#define _BGP_IC_MEM_GROUP13_OFFSET (0x0680) /* Group 13 offset */ +#define _BGP_IC_MEM_GROUP14_OFFSET (0x0700) /* Group 14 offset */ + + + + +/* Hierarchy Registers offsets */ +#define _BGP_IC_MEM_HNCR_OFFSET (0x0780) /* Hierarchy Non-Critical Register */ +#define _BGP_IC_MEM_HNCR0_OFFSET (0x0780) /* Hierarchy Non-Critical Register (core 0) */ +#define _BGP_IC_MEM_HNCR1_OFFSET (0x0784) /* Hierarchy Non-Critical Register (core 1) */ +#define _BGP_IC_MEM_HNCR2_OFFSET (0x0788) /* Hierarchy Non-Critical Register (core 2) */ +#define _BGP_IC_MEM_HNCR3_OFFSET (0x078C) /* Hierarchy Non-Critical Register (core 3) */ + + +#define _BGP_IC_MEM_HCR_OFFSET (0x0790) /* Hierarchy Critical Register */ +#define _BGP_IC_MEM_HCR0_OFFSET (0x0790) /* Hierarchy Critical Register (core 0) */ +#define _BGP_IC_MEM_HCR1_OFFSET (0x0794) /* Hierarchy Critical Register (core 1) */ +#define _BGP_IC_MEM_HCR2_OFFSET (0x0798) /* Hierarchy Critical Register (core 2) */ +#define _BGP_IC_MEM_HCR3_OFFSET (0x079C) /* Hierarchy Critical Register (core 3) */ + + +#define _BGP_IC_MEM_HMCHKR_OFFSET (0x07A0) /* Hierarchy Machine Check Register */ +#define _BGP_IC_MEM_HMCHKR0_OFFSET (0x07A0) /* Hierarchy Machine Check Register (core 0) */ +#define _BGP_IC_MEM_HMCHKR1_OFFSET (0x07A4) /* Hierarchy Machine Check Register (core 1) */ +#define _BGP_IC_MEM_HMCHKR2_OFFSET (0x07A8) /* Hierarchy Machine Check Register (core 2) */ +#define _BGP_IC_MEM_HMCHKR3_OFFSET (0x07AC) /* Hierarchy Machine Check Register (core 3) */ + + +#define _BGP_IC_MEM_HR_TI_MCHECK_OFFSET (0x07B0) /* hierarchy register ti_m_check (RO) */ +#define _BGP_IC_MEM_HR_UPC_TIMESTAMP_OFFSET (0x07B4) /* hierarchy register upc_timestamp_event (RO) */ +#define _BGP_IC_MEM_HR_CI_SYNC_STOP_OFFSET (0x07B8) /* hierarchy register ci_sync_stop (RO) */ + + +#define _BGP_IC_MEM_ERR_RW_OFFSET (0x07C0) /* IC Error Register (RW) */ +#define _BGP_IC_MEM_ERR_RDCLR_OFFSET (0x07C4) /* IC Error Register (RO) (Read Clear all bits) */ +#define _BGP_IC_MEM_ERR_ADDR_OFFSET (0x07C8) /* IC Error Address Register (RO) */ +#define _BGP_IC_MEM_ERR_DATA_OFFSET (0x07CC) /* IC Error Data Register (RO) */ + + +#define _BGP_IC_MEM_HR_TI_MCHECK_WOF_OFFSET (0x07D0) /* hierarchy register ti_m_check_WOF (RW) */ +#define _BGP_IC_MEM_HR_UPC_TIMESTAMP_WOF_OFFSET (0x07D4) /* hierarchy register upc_timestamp_event_WOF (RW) */ +#define _BGP_IC_MEM_HR_CI_SYNC_STOP_WOF_OFFSET (0x07D8) /* hierarchy register ci_sync_stop_WOF (RW) */ + + + +/* ************************************************************************* */ +/* definitions for each interrupt generating device */ +/* ************************************************************************* */ + +/* ************************************************************************* */ +/* Core-to-Core Software interrupts: Group 0 bits 00:31 */ +/* ************************************************************************* */ + +#define _BGP_IC_C2C_HIER_POS 0 +#define _BGP_IC_C2C_UNIT_NUM 0 +#define _BGP_IC_C2C_UNIT_POS 0 +#define _BGP_IC_C2C_UNIT_SIZE 32 +#define _BGP_IC_C2C_UNIT_MASK 0xffffffff + +/* ************************************************************************* */ +/* Core-to-Core Software interrupts: Group 0 bits 00:07 (Core 0) */ +/* ************************************************************************* */ + +#define _BGP_IC_C2C_C0_HIER_POS 0 +#define _BGP_IC_C2C_C0_UNIT_NUM 0 +#define _BGP_IC_C2C_C0_UNIT_POS 0 +#define _BGP_IC_C2C_C0_UNIT_SIZE 8 +#define _BGP_IC_C2C_C0_UNIT_MASK 0xff000000 + + +/* ************************************************************************* */ +/* Core-to-Core Software interrupts: Group 0 bits 08:15 (Core 1) */ +/* ************************************************************************* */ + +#define _BGP_IC_C2C_C1_HIER_POS 0 +#define _BGP_IC_C2C_C1_UNIT_NUM 0 +#define _BGP_IC_C2C_C1_UNIT_POS 8 +#define _BGP_IC_C2C_C1_UNIT_SIZE 8 +#define _BGP_IC_C2C_C1_UNIT_MASK 0x00ff0000 + + +/* ************************************************************************* */ +/* Core-to-Core Software interrupts: Group 0 bits 16:23 (Core 2) */ +/* ************************************************************************* */ + +#define _BGP_IC_C2C_C2_HIER_POS 0 +#define _BGP_IC_C2C_C2_UNIT_NUM 0 +#define _BGP_IC_C2C_C2_UNIT_POS 16 +#define _BGP_IC_C2C_C2_UNIT_SIZE 8 +#define _BGP_IC_C2C_C2_UNIT_MASK 0x0000ff00 + + + +/* ************************************************************************* */ +/* Core-to-Core Software interrupts: Group 0 bits 24:31 (Core 3) */ +/* ************************************************************************* */ + +#define _BGP_IC_C2C_C3_HIER_POS 0 +#define _BGP_IC_C2C_C3_UNIT_NUM 0 +#define _BGP_IC_C2C_C3_UNIT_POS 24 +#define _BGP_IC_C2C_C3_UNIT_SIZE 8 +#define _BGP_IC_C2C_C3_UNIT_MASK 0x000000ff + + + + + +/* ************************************************************************* */ +/* DMA Fatal Interrupt Request: Group 1 bits 00:31 */ +/* ************************************************************************* */ + +#define _BGP_IC_DMA_FT_HIER_POS 1 +#define _BGP_IC_DMA_FT_UNIT_NUM 1 +#define _BGP_IC_DMA_FT_UNIT_POS 0 +#define _BGP_IC_DMA_FT_UNIT_SIZE 32 +#define _BGP_IC_DMA_FT_UNIT_MASK 0xffffffff + +/* ************************************************************************* */ +/* DMA Non-Fatal Interrupt Request: Group 2 bits 00:31 */ +/* ************************************************************************* */ + +#define _BGP_IC_DMA_NFT_G2_HIER_POS 2 +#define _BGP_IC_DMA_NFT_G2_UNIT_NUM 2 +#define _BGP_IC_DMA_NFT_G2_UNIT_POS 0 +#define _BGP_IC_DMA_NFT_G2_UNIT_SIZE 32 +#define _BGP_IC_DMA_NFT_G2_UNIT_MASK 0xffffffff + +/* ************************************************************************* */ +/* DMA Non-Fatal Interrupt Request: Group 3 bits 00:31 */ +/* ************************************************************************* */ + +#define _BGP_IC_DMA_NFT_G3_HIER_POS 3 +#define _BGP_IC_DMA_NFT_G3_UNIT_NUM 3 +#define _BGP_IC_DMA_NFT_G3_UNIT_POS 0 +#define _BGP_IC_DMA_NFT_G3_UNIT_SIZE 32 +#define _BGP_IC_DMA_NFT_G3_UNIT_MASK 0xffffffff + + +/* ************************************************************************* */ +/* DP0 PU0 Interrupt Request: Group 4 bits 00:02 */ +/* ************************************************************************* */ + +#define _BGP_IC_DP0_PU0_HIER_POS 4 +#define _BGP_IC_DP0_PU0_UNIT_NUM 4 +#define _BGP_IC_DP0_PU0_UNIT_POS 0 +#define _BGP_IC_DP0_PU0_UNIT_SIZE 3 +#define _BGP_IC_DP0_PU0_UNIT_MASK 0xE0000000 + +/* ************************************************************************* */ +/* DP0 PU1 Interrupt Request: Group 4 bits 03:05 */ +/* ************************************************************************* */ + +#define _BGP_IC_DP0_PU1_HIER_POS 4 +#define _BGP_IC_DP0_PU1_UNIT_NUM 4 +#define _BGP_IC_DP0_PU1_UNIT_POS 3 +#define _BGP_IC_DP0_PU1_UNIT_SIZE 3 +#define _BGP_IC_DP0_PU1_UNIT_MASK 0x1C000000 + +/* ************************************************************************* */ +/* DP1 PU0 Interrupt Request: Group 4 bits 06:08 */ +/* ************************************************************************* */ + +#define _BGP_IC_DP1_PU0_HIER_POS 4 +#define _BGP_IC_DP1_PU0_UNIT_NUM 4 +#define _BGP_IC_DP1_PU0_UNIT_POS 6 +#define _BGP_IC_DP1_PU0_UNIT_SIZE 3 +#define _BGP_IC_DP1_PU0_UNIT_MASK 0x03800000 + +/* ************************************************************************* */ +/* DP1 PU1 Interrupt Request: Group 4 bits 09:11 */ +/* ************************************************************************* */ + +#define _BGP_IC_DP1_PU1_HIER_POS 4 +#define _BGP_IC_DP1_PU1_UNIT_NUM 4 +#define _BGP_IC_DP1_PU1_UNIT_POS 9 +#define _BGP_IC_DP1_PU1_UNIT_SIZE 3 +#define _BGP_IC_DP1_PU1_UNIT_MASK 0x00700000 + + +/* ************************************************************************* */ +/* Global Interrupt: Group 4 bits 12:21 */ +/* ************************************************************************* */ + +#define _BGP_IC_GINT_HIER_POS 4 +#define _BGP_IC_GINT_UNIT_NUM 4 +#define _BGP_IC_GINT_UNIT_POS 12 +#define _BGP_IC_GINT_UNIT_SIZE 10 +#define _BGP_IC_GINT_UNIT_MASK 0x000FFC00 + + +/* ************************************************************************* */ +/* SRAM Interrupt Request: Group 4 bits 22:24 */ +/* ************************************************************************* */ + +#define _BGP_IC_SRAM_HIER_POS 4 +#define _BGP_IC_SRAM_UNIT_NUM 4 +#define _BGP_IC_SRAM_UNIT_POS 22 +#define _BGP_IC_SRAM_UNIT_SIZE 3 +#define _BGP_IC_SRAM_UNIT_MASK 0x00000380 + + +/* ************************************************************************* */ +/* TI Global Attention Interrupt request: Group 4 bit 25 */ +/* ************************************************************************* */ + +#define _BGP_IC_GLOB_ATT_HIER_POS 4 +#define _BGP_IC_GLOB_ATT_UNIT_NUM 4 +#define _BGP_IC_GLOB_ATT_UNIT_POS 25 +#define _BGP_IC_GLOB_ATT_UNIT_SIZE 1 +#define _BGP_IC_GLOB_ATT_UNIT_MASK 0x00000040 + + +/* ************************************************************************* */ +/* TI LB Scan Attention Interrupt request: Group 4 bit 26 */ +/* ************************************************************************* */ + +#define _BGP_IC_LB_SCATTN_HIER_POS 4 +#define _BGP_IC_LB_SCATTN_UNIT_NUM 4 +#define _BGP_IC_LB_SCATTN_UNIT_POS 26 +#define _BGP_IC_LB_SCATTN_UNIT_SIZE 1 +#define _BGP_IC_LB_SCATTN_UNIT_MASK 0x00000020 + + +/* ************************************************************************* */ +/* TI AB Scan Attention Interrupt request: Group 4 bit 27 */ +/* ************************************************************************* */ + +#define _BGP_IC_AB_SCATTN_HIER_POS 4 +#define _BGP_IC_AB_SCATTN_UNIT_NUM 4 +#define _BGP_IC_AB_SCATTN_UNIT_POS 27 +#define _BGP_IC_AB_SCATTN_UNIT_SIZE 1 +#define _BGP_IC_AB_SCATTN_UNIT_MASK 0x00000010 + + +/* ************************************************************************* */ +/* TI HB Scan Attention Interrupt request: Group 4 bit 28 */ +/* ************************************************************************* */ + +#define _BGP_IC_HB_SCATTN_HIER_POS 4 +#define _BGP_IC_HB_SCATTN_UNIT_NUM 4 +#define _BGP_IC_HB_SCATTN_UNIT_POS 28 +#define _BGP_IC_HB_SCATTN_UNIT_SIZE 1 +#define _BGP_IC_HB_SCATTN_UNIT_MASK 0x00000008 + + +/* ************************************************************************* */ +/* TI DCR Read Timeout Interrupt request: Group 4 bit 29 */ +/* ************************************************************************* */ + +#define _BGP_IC_DCR_RD_TO_HIER_POS 4 +#define _BGP_IC_DCR_RD_TO_UNIT_NUM 4 +#define _BGP_IC_DCR_RD_TO_UNIT_POS 29 +#define _BGP_IC_DCR_RD_TO_UNIT_SIZE 1 +#define _BGP_IC_DCR_RD_TO_UNIT_MASK 0x00000004 + + +/* ************************************************************************* */ +/* TI DCR Write Timeout Interrupt request: Group 4 bit 30 */ +/* ************************************************************************* */ + +#define _BGP_IC_DCR_WR_TO_HIER_POS 4 +#define _BGP_IC_DCR_WR_TO_UNIT_NUM 4 +#define _BGP_IC_DCR_WR_TO_UNIT_POS 30 +#define _BGP_IC_DCR_WR_TO_UNIT_SIZE 1 +#define _BGP_IC_DCR_WR_TO_UNIT_MASK 0x00000002 + + + +/* ************************************************************************* */ +/* Collective Non-Critical interrupt: Group 5 bits 00:19 */ +/* ************************************************************************* */ + +#define _BGP_IC_COLNCRIT_HIER_POS 5 +#define _BGP_IC_COLNCRIT_UNIT_NUM 5 +#define _BGP_IC_COLNCRIT_UNIT_POS 0 +#define _BGP_IC_COLNCRIT_UNIT_SIZE 20 +#define _BGP_IC_COLNCRIT_UNIT_MASK 0xFFFFF000 + +/* ************************************************************************* */ +/* Collective Critical interrupt: Group 5 bits 20:23 */ +/* ************************************************************************* */ + +#define _BGP_IC_COLCRIT_HIER_POS 5 +#define _BGP_IC_COLCRIT_UNIT_NUM 5 +#define _BGP_IC_COLCRIT_UNIT_POS 20 +#define _BGP_IC_COLCRIT_UNIT_SIZE 4 +#define _BGP_IC_COLCRIT_UNIT_MASK 0x00000f00 + + +/* ************************************************************************* */ +/* SerDesr machine check: Group 6 bits 0:23 */ +/* ************************************************************************* */ + +#define _BGP_IC_SERDES_MCK_HIER_POS 6 +#define _BGP_IC_SERDES_MCK_UNIT_NUM 6 +#define _BGP_IC_SERDES_MCK_UNIT_POS 0 +#define _BGP_IC_SERDES_MCK_UNIT_SIZE 24 +#define _BGP_IC_SERDES_MCK_UNIT_MASK 0xFFFFFF00 + + +/* ************************************************************************* */ +/* UPC interrupt request: Group 6 bit 24 */ +/* ************************************************************************* */ + +#define _BGP_IC_UPC_HIER_POS 6 +#define _BGP_IC_UPC_UNIT_NUM 6 +#define _BGP_IC_UPC_UNIT_POS 24 +#define _BGP_IC_UPC_UNIT_SIZE 1 +#define _BGP_IC_UPC_UNIT_MASK 0x00000080 + + +/* ************************************************************************* */ +/* UPC Error interrupt request: Group 6 bit 25 */ +/* ************************************************************************* */ + +#define _BGP_IC_UPCERR_HIER_POS 6 +#define _BGP_IC_UPCERR_UNIT_NUM 6 +#define _BGP_IC_UPCERR_UNIT_POS 25 +#define _BGP_IC_UPCERR_UNIT_SIZE 1 +#define _BGP_IC_UPCERR_UNIT_MASK 0x00000040 + +/* ************************************************************************* */ +/* DCR Bus interrupt request: Group 6 bit 26 */ +/* ************************************************************************* */ + +#define _BGP_IC_DCRBUS_HIER_POS 6 +#define _BGP_IC_DCRBUS_UNIT_NUM 6 +#define _BGP_IC_DCRBUS_UNIT_POS 26 +#define _BGP_IC_DCRBUS_UNIT_SIZE 1 +#define _BGP_IC_DCRBUS_UNIT_MASK 0x00000020 + +/* ************************************************************************* */ +/* BIC machine check: Group 6 bit 27 */ +/* ************************************************************************* */ + +#define _BGP_IC_BIC_MCHK_HIER_POS 6 +#define _BGP_IC_BIC_MCHK_UNIT_NUM 6 +#define _BGP_IC_BIC_MCHK_UNIT_POS 27 +#define _BGP_IC_BIC_MCHK_UNIT_SIZE 1 +#define _BGP_IC_BIC_MCHK_UNIT_MASK 0x00000010 + +/* ************************************************************************* */ +/* BIC interrupt request: Group 6 bit 28 */ +/* ************************************************************************* */ + +#define _BGP_IC_BIC_IRQ_HIER_POS 6 +#define _BGP_IC_BIC_IRQ_UNIT_NUM 6 +#define _BGP_IC_BIC_IRQ_UNIT_POS 28 +#define _BGP_IC_BIC_IRQ_UNIT_SIZE 1 +#define _BGP_IC_BIC_IRQ_UNIT_MASK 0x00000008 + +/* ************************************************************************* */ +/* DEVBUS interrupt request: Group 6 bit 29 */ +/* ************************************************************************* */ + +#define _BGP_IC_DEVBUS_IRQ_HIER_POS 6 +#define _BGP_IC_DEVBUS_IRQ_UNIT_NUM 6 +#define _BGP_IC_DEVBUS_IRQ_UNIT_POS 29 +#define _BGP_IC_DEVBUS_IRQ_UNIT_SIZE 1 +#define _BGP_IC_DEVBUS_IRQ_UNIT_MASK 0x00000004 + +/* ************************************************************************* */ +/* Clockstop Stopped interrupt request: Group 6 bit 30 */ +/* ************************************************************************* */ + +#define _BGP_IC_CLK_STOP_HIER_POS 6 +#define _BGP_IC_CLK_STOP_UNIT_NUM 6 +#define _BGP_IC_CLK_STOP_UNIT_POS 30 +#define _BGP_IC_CLK_STOP_UNIT_SIZE 1 +#define _BGP_IC_CLK_STOP_UNIT_MASK 0x00000002 + +/* ************************************************************************* */ +/* Environment Monitor interrupt request: Group 6 bit 31 */ +/* ************************************************************************* */ + +#define _BGP_IC_ENV_MON_HIER_POS 6 +#define _BGP_IC_ENV_MON_UNIT_NUM 6 +#define _BGP_IC_ENV_MON_UNIT_POS 31 +#define _BGP_IC_ENV_MON_UNIT_SIZE 1 +#define _BGP_IC_ENV_MON_UNIT_MASK 0x00000001 + + +/* ************************************************************************* */ +/* L30 machine check: Group 7 bits 0:10 */ +/* ************************************************************************* */ + +#define _BGP_IC_L30_MCHK_HIER_POS 7 +#define _BGP_IC_L30_MCHK_UNIT_NUM 7 +#define _BGP_IC_L30_MCHK_UNIT_POS 0 +#define _BGP_IC_L30_MCHK_UNIT_SIZE 11 +#define _BGP_IC_L30_MCHK_UNIT_MASK 0xFFE00000 + +/* ************************************************************************* */ +/* L30 interrupt request: Group 7 bits 11 */ +/* ************************************************************************* */ + +#define _BGP_IC_L30_IRQ_HIER_POS 7 +#define _BGP_IC_L30_IRQ_UNIT_NUM 7 +#define _BGP_IC_L30_IRQ_UNIT_POS 11 +#define _BGP_IC_L30_IRQ_UNIT_SIZE 1 +#define _BGP_IC_L30_IRQ_UNIT_MASK 0x00100000 + +/* ************************************************************************* */ +/* L31 machine check: Group 7 bits 12:22 */ +/* ************************************************************************* */ + +#define _BGP_IC_L31_MCHK_HIER_POS 7 +#define _BGP_IC_L31_MCHK_UNIT_NUM 7 +#define _BGP_IC_L31_MCHK_UNIT_POS 12 +#define _BGP_IC_L31_MCHK_UNIT_SIZE 11 +#define _BGP_IC_L31_MCHK_UNIT_MASK 0x000FFE00 + +/* ************************************************************************* */ +/* L31 interrupt request: Group 7 bits 23 */ +/* ************************************************************************* */ + +#define _BGP_IC_L31_IRQ_HIER_POS 7 +#define _BGP_IC_L31_IRQ_UNIT_NUM 7 +#define _BGP_IC_L31_IRQ_UNIT_POS 23 +#define _BGP_IC_L31_IRQ_UNIT_SIZE 1 +#define _BGP_IC_L31_IRQ_UNIT_MASK 0x00000100 + + +/* ************************************************************************* */ +/* DDR 0 Recoverable error: Group 7 bit 24 */ +/* ************************************************************************* */ + +#define _BGP_IC_DDR0_RERR_HIER_POS 7 +#define _BGP_IC_DDR0_RERR_UNIT_NUM 7 +#define _BGP_IC_DDR0_RERR_UNIT_POS 24 +#define _BGP_IC_DDR0_RERR_UNIT_SIZE 1 +#define _BGP_IC_DDR0_RERR_UNIT_MASK 0x00000080 + +/* ************************************************************************* */ +/* DDR 0 Special Attention: Group 7 bit 25 */ +/* ************************************************************************* */ + +#define _BGP_IC_DDR0_SATT_HIER_POS 7 +#define _BGP_IC_DDR0_SATT_UNIT_NUM 7 +#define _BGP_IC_DDR0_SATT_UNIT_POS 25 +#define _BGP_IC_DDR0_SATT_UNIT_SIZE 1 +#define _BGP_IC_DDR0_SATT_UNIT_MASK 0x00000040 + +/* ************************************************************************* */ +/* DDR 0 Machine Check: Group 7 bit 26 */ +/* ************************************************************************* */ + +#define _BGP_IC_DDR0_MCHK_HIER_POS 7 +#define _BGP_IC_DDR0_MCHK_UNIT_NUM 7 +#define _BGP_IC_DDR0_MCHK_UNIT_POS 26 +#define _BGP_IC_DDR0_MCHK_UNIT_SIZE 1 +#define _BGP_IC_DDR0_MCHK_UNIT_MASK 0x00000020 + + +/* ************************************************************************* */ +/* DDR 1 Recoverable error: Group 7 bit 27 */ +/* ************************************************************************* */ + +#define _BGP_IC_DDR1_RERR_HIER_POS 7 +#define _BGP_IC_DDR1_RERR_UNIT_NUM 7 +#define _BGP_IC_DDR1_RERR_UNIT_POS 27 +#define _BGP_IC_DDR1_RERR_UNIT_SIZE 1 +#define _BGP_IC_DDR1_RERR_UNIT_MASK 0x00000010 + +/* ************************************************************************* */ +/* DDR 1 Special Attention: Group 7 bit 28 */ +/* ************************************************************************* */ + +#define _BGP_IC_DDR1_SATT_HIER_POS 7 +#define _BGP_IC_DDR1_SATT_UNIT_NUM 7 +#define _BGP_IC_DDR1_SATT_UNIT_POS 28 +#define _BGP_IC_DDR1_SATT_UNIT_SIZE 1 +#define _BGP_IC_DDR1_SATT_UNIT_MASK 0x00000008 + +/* ************************************************************************* */ +/* DDR 1 Machine Check: Group 7 bit 29 */ +/* ************************************************************************* */ + +#define _BGP_IC_DDR1_MCHK_HIER_POS 7 +#define _BGP_IC_DDR1_MCHK_UNIT_NUM 7 +#define _BGP_IC_DDR1_MCHK_UNIT_POS 29 +#define _BGP_IC_DDR1_MCHK_UNIT_SIZE 1 +#define _BGP_IC_DDR1_MCHK_UNIT_MASK 0x00000004 + + +/* ************************************************************************* */ +/* Test Interface interrupt request: Group 7 bit 30:31 */ +/* ************************************************************************* */ + +#define _BGP_IC_TESTINT_HIER_POS 7 +#define _BGP_IC_TESTINT_UNIT_NUM 7 +#define _BGP_IC_TESTINT_UNIT_POS 30 +#define _BGP_IC_TESTINT_UNIT_SIZE 2 +#define _BGP_IC_TESTINT_UNIT_MASK 0x00000003 + + +/* ************************************************************************* */ +/* Ethernet TOMAL interrupt request: Group 8 bits 0:1 */ +/* ************************************************************************* */ + +#define _BGP_IC_TOMAL_HIER_POS 8 +#define _BGP_IC_TOMAL_UNIT_NUM 8 +#define _BGP_IC_TOMAL_UNIT_POS 0 +#define _BGP_IC_TOMAL_UNIT_SIZE 2 +#define _BGP_IC_TOMAL_UNIT_MASK 0xC0000000 + + + +/* ************************************************************************* */ +/* Ethernet XEMAC interrupt request: Group 9 bits 0 */ +/* ************************************************************************* */ + +#define _BGP_IC_XEMAC_HIER_POS 9 +#define _BGP_IC_XEMAC_UNIT_NUM 9 +#define _BGP_IC_XEMAC_UNIT_POS 0 +#define _BGP_IC_XEMAC_UNIT_SIZE 1 +#define _BGP_IC_XEMAC_UNIT_MASK 0x80000000 + +/* ************************************************************************* */ +/* Ethernet interrupt request: Group 9 bits 1 */ +/* ************************************************************************* */ + +#define _BGP_IC_ETH_HIER_POS 9 +#define _BGP_IC_ETH_UNIT_NUM 9 +#define _BGP_IC_ETH_UNIT_POS 1 +#define _BGP_IC_ETH_UNIT_SIZE 1 +#define _BGP_IC_ETH_UNIT_MASK 0x40000000 + +/* ************************************************************************* */ +/* Ethernet XENPAK interrupt request: Group 9 bits 2 */ +/* ************************************************************************* */ + +#define _BGP_IC_XENPAK_HIER_POS 9 +#define _BGP_IC_XENPAK_UNIT_NUM 9 +#define _BGP_IC_XENPAK_UNIT_POS 2 +#define _BGP_IC_XENPAK_UNIT_SIZE 1 +#define _BGP_IC_XENPAK_UNIT_MASK 0x20000000 + + + + +#endif diff --git a/arch/powerpc/include/common/alignment.h b/arch/powerpc/include/common/alignment.h new file mode 100644 index 00000000000000..10bfd376734dfa --- /dev/null +++ b/arch/powerpc/include/common/alignment.h @@ -0,0 +1,66 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/** + * \file common/alignment.h + */ + +#ifndef _ALIGNMENT_H_ /* Prevent multiple inclusion */ +#define _ALIGNMENT_H_ + + + +#include <common/namespace.h> + +__BEGIN_DECLS + +#if defined(__ASSEMBLY__) + +#define ALIGN_L1_DIRTYBIT 3 +#define ALIGN_QUADWORD 4 +#define ALIGN_L1_CACHE 5 +#define ALIGN_L1I_CACHE 5 +#define ALIGN_L1D_CACHE 5 +#define ALIGN_L3_CACHE 7 + +#elif defined(__GNUC__) || defined(__xlC__) + +#define ALIGN_L1_DIRTYBIT __attribute__ ((aligned ( 8))) +#define ALIGN_QUADWORD __attribute__ ((aligned ( 16))) +#define ALIGN_L1_CACHE __attribute__ ((aligned ( 32))) +#define ALIGN_L1I_CACHE __attribute__ ((aligned ( 32))) +#define ALIGN_L1D_CACHE __attribute__ ((aligned ( 32))) +#define ALIGN_L3_CACHE __attribute__ ((aligned (128))) + +#else + +#warning "Need alignment directives for your compiler!" + +#define ALIGN_QUADWORD +#define ALIGN_L1_CACHE +#define ALIGN_L1I_CACHE +#define ALIGN_L1D_CACHE +#define ALIGN_L3_CACHE + +#endif /* __ASSEMBLY__ */ + +__END_DECLS + + + +#endif /* Add nothing below this line */ diff --git a/arch/powerpc/include/common/bgp_bitnumbers.h b/arch/powerpc/include/common/bgp_bitnumbers.h new file mode 100644 index 00000000000000..8a0b2bce175129 --- /dev/null +++ b/arch/powerpc/include/common/bgp_bitnumbers.h @@ -0,0 +1,113 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/** + * \file common/bgp_bitnumbers.h + */ + +#ifndef _BGL_BITNUMBERS_H_ /* Prevent multiple inclusion */ +#define _BGL_BITNUMBERS_H_ + +#include <common/namespace.h> + +__BEGIN_DECLS + +/* These defines allows use of IBM's bit numberings (MSb=0, LSb=31)for multi-bit fields */ +/* b = IBM bit number of the least significant bit (highest number) */ +/* x = value to set in field */ +/* s = size */ +#define _BS(b,x,s)( ( ( x) & ( 0x7FFFFFFF>> ( 31- ( s)))) << ( 31- ( b))) +#define _BG(b,x,s)( ( _BS(b,0x7FFFFFFF,s) & x ) >> (31-b) ) +#define _BS64(b,x,s)( ( ( x) & ( 0x7FFFFFFFFFFFFFFFLL>> ( 63- ( s)))) << ( 63- ( b))) +#define _BG64(b,x,s)( ( _BS64(b, 0x7FFFFFFFFFFFFFFFLL,s) & x ) >> (63-b) ) +#define _BN(b) ((1<<(31-(b)))) +#define _B1(b,x) (((x)&0x1)<<(31-(b))) +#define _B2(b,x) (((x)&0x3)<<(31-(b))) +#define _B3(b,x) (((x)&0x7)<<(31-(b))) +#define _B4(b,x) (((x)&0xF)<<(31-(b))) +#define _B5(b,x) (((x)&0x1F)<<(31-(b))) +#define _B6(b,x) (((x)&0x3F)<<(31-(b))) +#define _B7(b,x) (((x)&0x7F)<<(31-(b))) +#define _B8(b,x) (((x)&0xFF)<<(31-(b))) +#define _B9(b,x) (((x)&0x1FF)<<(31-(b))) +#define _B10(b,x) (((x)&0x3FF)<<(31-(b))) +#define _B11(b,x) (((x)&0x7FF)<<(31-(b))) +#define _B12(b,x) (((x)&0xFFF)<<(31-(b))) +#define _B13(b,x) (((x)&0x1FFF)<<(31-(b))) +#define _B14(b,x) (((x)&0x3FFF)<<(31-(b))) +#define _B15(b,x) (((x)&0x7FFF)<<(31-(b))) +#define _B16(b,x) (((x)&0xFFFF)<<(31-(b))) +#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b))) +#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b))) +#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b))) +#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b))) +#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b))) +#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b))) +#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b))) +#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b))) +#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b))) +#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b))) +#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b))) +#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b))) +#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b))) +#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b))) +#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b))) + +#ifndef __ASSEMBLY__ + +/* These defines ease extraction of bitfields. (Not useful in assembler code.) */ +/* x = 32 bit value */ +/* b = IBM bit number of least significant bit of field */ +/* when b is a const, compiler should generate a single rotate-and-mask instruction */ +#define _GN(x,b) (((x)>>(31-(b)))&0x1) +#define _G2(x,b) (((x)>>(31-(b)))&0x3) +#define _G3(x,b) (((x)>>(31-(b)))&0x7) +#define _G4(x,b) (((x)>>(31-(b)))&0xF) +#define _G5(x,b) (((x)>>(31-(b)))&0x1F) +#define _G6(x,b) (((x)>>(31-(b)))&0x3F) +#define _G7(x,b) (((x)>>(31-(b)))&0x7F) +#define _G8(x,b) (((x)>>(31-(b)))&0xFF) +#define _G9(x,b) (((x)>>(31-(b)))&0x1FF) +#define _G10(x,b) (((x)>>(31-(b)))&0x3FF) +#define _G11(x,b) (((x)>>(31-(b)))&0x7FF) +#define _G12(x,b) (((x)>>(31-(b)))&0xFFF) +#define _G13(x,b) (((x)>>(31-(b)))&0x1FFF) +#define _G14(x,b) (((x)>>(31-(b)))&0x3FFF) +#define _G15(x,b) (((x)>>(31-(b)))&0x7FFF) +#define _G16(x,b) (((x)>>(31-(b)))&0xFFFF) +#define _G17(x,b) (((x)>>(31-(b)))&0x1FFFF) +#define _G18(x,b) (((x)>>(31-(b)))&0x3FFFF) +#define _G19(x,b) (((x)>>(31-(b)))&0x7FFFF) +#define _G20(x,b) (((x)>>(31-(b)))&0xFFFFF) +#define _G21(x,b) (((x)>>(31-(b)))&0x1FFFFF) +#define _G22(x,b) (((x)>>(31-(b)))&0x3FFFFF) +#define _G23(x,b) (((x)>>(31-(b)))&0x7FFFFF) +#define _G24(x,b) (((x)>>(31-(b)))&0xFFFFFF) +#define _G25(x,b) (((x)>>(31-(b)))&0x1FFFFFF) +#define _G26(x,b) (((x)>>(31-(b)))&0x3FFFFFF) +#define _G27(x,b) (((x)>>(31-(b)))&0x7FFFFFF) +#define _G28(x,b) (((x)>>(31-(b)))&0xFFFFFFF) +#define _G29(x,b) (((x)>>(31-(b)))&0x1FFFFFFF) +#define _G30(x,b) (((x)>>(31-(b)))&0x3FFFFFFF) +#define _G31(x,b) (((x)>>(31-(b)))&0x7FFFFFFF) + +#endif /* __ASSEMBLY__ */ + +__END_DECLS + +#endif /* Add nothing below this line. */ diff --git a/arch/powerpc/include/common/bgp_chipversion.h b/arch/powerpc/include/common/bgp_chipversion.h new file mode 100644 index 00000000000000..eba213a720a0b3 --- /dev/null +++ b/arch/powerpc/include/common/bgp_chipversion.h @@ -0,0 +1,52 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/** + * \file common/bgp_chipversion.h + */ + +#ifndef _BGP_CHIPVERSION_H_ /* Prevent multiple inclusion */ +#define _BGP_CHIPVERSION_H_ + + + +#include <common/namespace.h> + +__BEGIN_DECLS + +#define BGP_CHIPVERSION_DD2 + +#if defined BGP_CHIPVERSION_DD1 +/* Settings for DD1 */ +#define BGP_DD1_WORKAROUNDS 1 + +#elif defined BGP_CHIPVERSION_DD2 +/* Settings for DD2 */ + +#else +/* */ +#error "Invalid chip version setting" + +#endif + + +__END_DECLS + + + +#endif /* Add nothing below this line. */ diff --git a/arch/powerpc/include/common/bgp_personality.h b/arch/powerpc/include/common/bgp_personality.h new file mode 100644 index 00000000000000..9d64516c8a9ede --- /dev/null +++ b/arch/powerpc/include/common/bgp_personality.h @@ -0,0 +1,786 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/** + * \file common/bgp_personality.h + */ + +#ifndef _BGP_PERSONALITY_H_ /* Prevent multiple inclusion */ +#define _BGP_PERSONALITY_H_ + + + +#include <common/namespace.h> + +__BEGIN_DECLS + +#include <common/bgp_chipversion.h> +#include <common/alignment.h> +#include <common/bgp_bitnumbers.h> +#include <bpcore/bgp_types.h> + +/* */ +/* I/O Node Linux currently hard-codes the personality address. */ +/* */ +#define _BGP_HARDCODED_PERSONALITY_SRAM_ADDRESS (0xFFFFF800) + +#define _BGP_PERSONALITY_VERSION (0x0A) + +#define _BGP_DEFAULT_FREQ (850) /* Match the current DD2 hardware */ + +#define _BGP_PERS_Unused_DEFAULT 0 + +#define _BGP_PERS_PROCESSCONFIG_DIAGS (0xFF000000) /* Diagnostic Mode: All Cores Enabled and Privileged in Process 0 */ +#define _BGP_PERS_PROCESSCONFIG_SMP (0x0F000000) /* All Cores Enabled User-Space in Process 0 */ +#define _BGP_PERS_PROCESSCONFIG_VNM (0x08040201) /* 4 Single-Core Processes (a.k.a. Virtual Nodes) */ +#define _BGP_PERS_PROCESSCONFIG_2x2 (0x0C030000) /* 2 Processes of 2 Cores each in same DP unit */ +#define _BGP_PERS_PROCESSCONFIG_DEFAULT (_BGP_PERS_PROCESSCONFIG_DIAGS) +#define _BGP_PERS_PROCESSCONFIG_PRIV_MSK (0xF0F0F0F0) /* Mask to isolate privileged core flags */ + + +/* Personality.Kernel_Config.RASPolicy */ +#define _BGP_PERS_RASPOLICY_VERBOSITY(x) _B2( 1,x) /* Verbosity as shown below */ +#define _BGP_PERS_RASPOLICY_MINIMAL _BGP_PERS_RASPOLICY_VERBOSITY(0) /* Benchmarking Level of Capture and Reporting */ +#define _BGP_PERS_RASPOLICY_NORMAL _BGP_PERS_RASPOLICY_VERBOSITY(1) /* Normal Production Level of Capture and Reporting */ +#define _BGP_PERS_RASPOLICY_VERBOSE _BGP_PERS_RASPOLICY_VERBOSITY(2) /* Manufacturing Test and Diagnostics */ +#define _BGP_PERS_RASPOLICY_EXTREME _BGP_PERS_RASPOLICY_VERBOSITY(3) /* Report Every Event Immediately - Thresholds set to 1 */ +#define _BGP_PERS_RASPOLICY_FATALEXIT _BN( 2) /* Fatal is Fatal, so exit. */ + +#define _BGP_PERS_RASPOLICY_DEFAULT (_BGP_PERS_RASPOLICY_VERBOSE | _BGP_PERS_RASPOLICY_FATALEXIT) + + +#define _BGP_PERSONALITY_LEN_NFSDIR (32) /* 32bytes */ + +#define _BGP_PERSONALITY_LEN_SECKEY (32) /* 32bytes */ + +/* Personality.NodeConfig Driver Enables and Configurations */ +#define _BGP_PERS_ENABLE_Simulation _BN( 0) /* Running on VHDL Simulation */ +#define _BGP_PERS_ENABLE_LockBox _BN( 1) +#define _BGP_PERS_ENABLE_BIC _BN( 2) +#define _BGP_PERS_ENABLE_DDR _BN( 3) /* DDR Controllers (not Fusion DDR model) */ +#define _BGP_PERS_ENABLE_LoopBack _BN( 4) /* LoopBack: Internal TS/TR or SerDes Loopback */ +#define _BGP_PERS_ENABLE_GlobalInts _BN( 5) +#define _BGP_PERS_ENABLE_Collective _BN( 6) /* Enable Collective Network */ +#define _BGP_PERS_ENABLE_Torus _BN( 7) +#define _BGP_PERS_ENABLE_TorusMeshX _BN( 8) /* Torus is a Mesh in the X-dimension */ +#define _BGP_PERS_ENABLE_TorusMeshY _BN( 9) /* Torus is a Mesh in the Y-dimension */ +#define _BGP_PERS_ENABLE_TorusMeshZ _BN(10) /* Torus is a Mesh in the Z-dimension */ +#define _BGP_PERS_ENABLE_TreeA _BN(11) /* Enable Collective Network A-link */ +#define _BGP_PERS_ENABLE_TreeB _BN(12) /* Enable Collective Network B-link */ +#define _BGP_PERS_ENABLE_TreeC _BN(13) /* Enable Collective Network C-link */ +#define _BGP_PERS_ENABLE_DMA _BN(14) +#define _BGP_PERS_ENABLE_SerDes _BN(15) +#define _BGP_PERS_ENABLE_UPC _BN(16) +#define _BGP_PERS_ENABLE_EnvMon _BN(17) +#define _BGP_PERS_ENABLE_Ethernet _BN(18) +#define _BGP_PERS_ENABLE_JTagLoader _BN(19) /* Converse with JTag Host to load kernel */ +#define _BGP_PERS_ENABLE_MailBoxReceive _BGP_PERS_ENABLE_JTagLoader +#define _BGP_PERS_ENABLE_PowerSave _BN(20) /* Turn off unused devices (Eth on CN, TS on ION) */ +#define _BGP_PERS_ENABLE_FPU _BN(21) /* Enable Double-Hummers (not supported in EventSim) */ +#define _BGP_PERS_ENABLE_StandAlone _BN(22) /* Disable "CIOD" interface, Requires Collective! */ +#define _BGP_PERS_ENABLE_TLBMisses _BN(23) /* TLB Misses vs Wasting Memory (see bgp_AppSetup.c) */ +#define _BGP_PERS_ENABLE_Mambo _BN(24) /* Running under Mambo? Used by Linux */ +#define _BGP_PERS_ENABLE_TreeBlast _BN(25) /* Enable Tree "Blast" mode */ +#define _BGP_PERS_ENABLE_BlindStacks _BN(26) /* For "XB" Tests, Lock 16K Stacks in Blind Device */ +#define _BGP_PERS_ENABLE_CNK_Malloc _BN(27) /* Enable Malloc Support in CNK. */ +#define _BGP_PERS_ENABLE_Reproducibility _BN(28) /* Enable Cycle Reproducibility */ +#define _BGP_PERS_ENABLE_HighThroughput _BN(29) /* Enable high throughput computing mode */ +#define _BGP_PERS_ENABLE_DiagnosticsMode _BN(30) /* Enable diagnostics mode */ + +/* Configure L1+L2 into BG/L Mode (s/w managed L1 coherence, write-back) */ +/* This overrides most L1, L2, and Snoop settings. Carefull!! */ +#define _BGP_PERS_ENABLE_BGLMODE _BN(31) /* (not yet fully implemented) */ + +/* Default Setup for Simulation: Torus Meshes, DMA, SerDes, Ethernet, JTagLoader, PowerSave */ + +#define _BGP_PERS_NODECONFIG_DEFAULT (_BGP_PERS_ENABLE_Simulation |\ + _BGP_PERS_ENABLE_LockBox |\ + _BGP_PERS_ENABLE_BIC |\ + _BGP_PERS_ENABLE_DDR |\ + _BGP_PERS_ENABLE_LoopBack |\ + _BGP_PERS_ENABLE_GlobalInts |\ + _BGP_PERS_ENABLE_Collective |\ + _BGP_PERS_ENABLE_Torus |\ + _BGP_PERS_ENABLE_UPC |\ + _BGP_PERS_ENABLE_EnvMon |\ + _BGP_PERS_ENABLE_FPU |\ + _BGP_PERS_ENABLE_TLBMisses |\ + _BGP_PERS_ENABLE_StandAlone) + +/* Default Setup for Hardware: */ +/* Supports Stand-Alone CNA Applications. */ +/* Bootloader-Extensions and XB's must turn-off JTagLoader */ +#define _BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE (_BGP_PERS_ENABLE_JTagLoader |\ + _BGP_PERS_ENABLE_LockBox |\ + _BGP_PERS_ENABLE_BIC |\ + _BGP_PERS_ENABLE_DDR |\ + _BGP_PERS_ENABLE_GlobalInts |\ + _BGP_PERS_ENABLE_Collective |\ + _BGP_PERS_ENABLE_SerDes |\ + _BGP_PERS_ENABLE_UPC |\ + _BGP_PERS_ENABLE_EnvMon |\ + _BGP_PERS_ENABLE_FPU |\ + _BGP_PERS_ENABLE_TLBMisses |\ + _BGP_PERS_ENABLE_StandAlone) + + + +/* these fields are defined by the control system depending on compute/io node */ +/* _BGP_PERS_ENABLE_Torus | */ +/* _BGP_PERS_ENABLE_TorusMeshX | */ +/* _BGP_PERS_ENABLE_TorusMeshY | */ +/* _BGP_PERS_ENABLE_TorusMeshZ | */ + + + +/* Personality.L1Config: Controls and Settings for L1 Cache */ +#define _BGP_PERS_L1CONFIG_L1I _BN( 0) /* L1 Enabled for Instructions */ +#define _BGP_PERS_L1CONFIG_L1D _BN( 1) /* L1 Enabled for Data */ +#define _BGP_PERS_L1CONFIG_L1SWOA _BN( 2) /* L1 Store WithOut Allocate */ +#define _BGP_PERS_L1CONFIG_L1Recovery _BN( 3) /* L1 Full Recovery Mode */ +#define _BGP_PERS_L1CONFIG_L1WriteThru _BN( 4) /* L1 Write-Thru (not svc_host changeable (yet?)) */ +#define _BGP_PERS_L1CONFIG_DO_L1ITrans _BN( 5) /* Enable L1 Instructions Transient? */ +#define _BGP_PERS_L1CONFIG_DO_L1DTrans _BN( 6) /* Enable L1 Data Transient? */ + /* unused 9bits: 7..15 */ +#define _BGP_PERS_L1CONFIG_L1ITrans(x) _B8(23,x) /* L1 Transient for Instructions in Groups of 16 Lines */ +#define _BGP_PERS_L1CONFIG_L1DTrans(x) _B8(31,x) /* L1 Transient for Data in Groups of 16 Lines */ + +#define _BGP_PERS_L1CONFIG_DEFAULT (_BGP_PERS_L1CONFIG_L1I |\ + _BGP_PERS_L1CONFIG_L1D |\ + _BGP_PERS_L1CONFIG_L1SWOA |\ + _BGP_PERS_L1CONFIG_L1Recovery |\ + _BGP_PERS_L1CONFIG_L1WriteThru) + +typedef union T_BGP_Pers_L1Cfg + { + uint32_t l1cfg; + struct { + unsigned l1i : 1; + unsigned l1d : 1; + unsigned l1swoa : 1; + unsigned l1recovery : 1; + unsigned l1writethru : 1; + unsigned do_l1itrans : 1; + unsigned do_l1dtrans : 1; + unsigned l1rsvd : 9; + unsigned l1itrans : 8; + unsigned l1dtrans : 8; + }; + } + _BGP_Pers_L1Cfg; + +/* Personality.L2Config: Controls and Settings for L2 and Snoop */ +#define _BGP_PERS_L2CONFIG_L2I _BN( 0) /* L2 Instruction Caching Enabled */ +#define _BGP_PERS_L2CONFIG_L2D _BN( 1) /* L2 Data Caching Enabled */ +#define _BGP_PERS_L2CONFIG_L2PF _BN( 2) /* L2 Automatic Prefetching Enabled */ +#define _BGP_PERS_L2CONFIG_L2PFO _BN( 3) /* L2 Optimistic Prefetching Enabled */ +#define _BGP_PERS_L2CONFIG_L2PFA _BN( 4) /* L2 Aggressive Prefetching Enabled (fewer deeper streams) */ +#define _BGP_PERS_L2CONFIG_L2PFS _BN( 5) /* L2 Aggressive Many-Stream Prefetching Enabled (deeper only when available buffers) */ +#define _BGP_PERS_L2CONFIG_Snoop _BN( 6) /* Just NULL Snoop Filter */ +#define _BGP_PERS_L2CONFIG_SnoopCache _BN( 7) /* Snoop Caches */ +#define _BGP_PERS_L2CONFIG_SnoopStream _BN( 8) /* Snoop Stream Registers (Disable for BG/P Rit 1.0 due to PPC450 errata) */ +#define _BGP_PERS_L2CONFIG_SnoopRange _BN( 9) /* Snoop Range Filter when possible */ +#define _BGP_PERS_L2CONFIG_BUG824LUMPY _BN(10) /* BPC_BUGS 824: Fix with Lumpy Performance */ +#define _BGP_PERS_L2CONFIG_BUG824SMOOTH _BN(11) /* BPC_BUGS 824: Fix with Smooth Performance, but -12% Memory */ +#define _BGP_PERS_L2CONFIG_NONCOHERENT_STACKS _BN(12) /* Special for Snoop diagnostics. See bgp_vmm.c */ + /* additional bits may be used for Snoop setting tweaks */ + +/* Default L2 Configuration: */ +/* L2 Enabled with Multi-Stream Aggressive Prefetching */ +/* Snoop Enabled with all filters except Range */ +#define _BGP_PERS_L2CONFIG_DEFAULT (_BGP_PERS_L2CONFIG_L2I |\ + _BGP_PERS_L2CONFIG_L2D |\ + _BGP_PERS_L2CONFIG_L2PF |\ + _BGP_PERS_L2CONFIG_L2PFO |\ + _BGP_PERS_L2CONFIG_L2PFS |\ + _BGP_PERS_L2CONFIG_Snoop |\ + _BGP_PERS_L2CONFIG_SnoopCache |\ + _BGP_PERS_L2CONFIG_SnoopStream) + +/* Personality.L3Config: Controls and Settings for L3 */ +/* Note: Most bits match _BGP_L3x_CTRL DCRs. */ +/* See arch/include/bpcore/bgl_l3_dcr.h */ +#define _BGP_PERS_L3CONFIG_L3I _BN( 0) /* L3 Enabled for Instructions */ +#define _BGP_PERS_L3CONFIG_L3D _BN( 1) /* L3 Enabled for Data */ +#define _BGP_PERS_L3CONFIG_L3PFI _BN( 2) /* Inhibit L3 Prefetch from DDR */ +#define _BGP_PERS_L3CONFIG_DO_Scratch _BN( 3) /* Set up Scratch? */ +#define _BGP_PERS_L3CONFIG_DO_PFD0 _BN( 4) /* Adjust PFD0? */ +#define _BGP_PERS_L3CONFIG_DO_PFD1 _BN( 5) /* Adjust PFD1? */ +#define _BGP_PERS_L3CONFIG_DO_PFDMA _BN( 6) /* Adjust PFDMA? */ +#define _BGP_PERS_L3CONFIG_DO_PFQD _BN( 7) /* Adjust PFQD? */ + /* 8..15 unused/available */ +#define _BGP_PERS_L3CONFIG_Scratch(x) _B4(19,x) /* Scratch 8ths: 0..8 */ +#define _BGP_PERS_L3CONFIG_PFD0(x) _B3(22,x) /* Prefetch Depth for DP0 */ +#define _BGP_PERS_L3CONFIG_PFD1(x) _B3(25,x) /* Prefetch Depth for DP1 */ +#define _BGP_PERS_L3CONFIG_PFDMA(x) _B3(28,x) /* Prefetch Depth for DMA */ +#define _BGP_PERS_L3CONFIG_PFQD(x) _B3(31,x) /* Prefetch Queue Depth */ + +/* General L3 Configuration */ +typedef union T_BGP_Pers_L3Cfg + { + uint32_t l3cfg; + struct { + unsigned l3i : 1; + unsigned l3d : 1; + unsigned l3pfi : 1; + unsigned do_scratch : 1; + unsigned do_pfd0 : 1; + unsigned do_pfd1 : 1; + unsigned do_pfdma : 1; + unsigned do_pfqd : 1; + unsigned rsvd : 8; + unsigned scratch : 4; + unsigned pfd0 : 3; + unsigned pfd1 : 3; + unsigned pfdma : 3; + unsigned pfqd : 3; + }; + } + _BGP_Pers_L3Cfg; + +/* Default L3 Configuration: */ +/* L3 Enabled for Instructions and Data */ +/* No Prefetch Depth overrides, No Scratch, No Scrambling. */ +#define _BGP_PERS_L3CONFIG_DEFAULT (_BGP_PERS_L3CONFIG_L3I |\ + _BGP_PERS_L3CONFIG_L3D |\ + _BGP_PERS_L3CONFIG_DO_PFDMA |\ + _BGP_PERS_L3CONFIG_PFDMA(4)) + + +/* L3 Cache and Bank Selection, and prefetching tweaks (Recommended for Power-Users) */ +#define _BGP_PERS_L3SELECT_DO_CacheSel _BN( 0) /* Adjust Cache Select setting? */ +#define _BGP_PERS_L3SELECT_DO_BankSel _BN( 1) /* Adjust Bank Select setting? */ +#define _BGP_PERS_L3SELECT_Scramble _BN( 2) /* L3 Scramble */ +#define _BGP_PERS_L3SELECT_PFby2 _BN( 3) /* Prefetch by 2 if set, else by 1 (default) if clear. */ +#define _BGP_PERS_L3SELECT_CacheSel(x) _B5( 8,x) /* PhysAddr Bit for L3 Selection (0..26) */ +#define _BGP_PERS_L3SELECT_BankSel(x) _B5(13,x) /* PhysAddr Bit for L3 Bank Selection (0..26) Must be > CacheSel. */ + +typedef union T_BGP_Pers_L3Select + { + uint32_t l3select; + struct { + unsigned do_CacheSel : 1; + unsigned do_BankSel : 1; + unsigned l3Scramble : 1; + unsigned l3_PF_by2 : 1; /* default is PreFetch by 1. */ + unsigned CacheSel : 5; /* Physical Address Bit for L3 Selection (0..26) */ + unsigned BankSel : 5; /* 0..26 Must be strictly greater than CacheSel. */ + unsigned rsvd : 18; + }; + } + _BGP_Pers_L3Select; + +/* Default L3 Selection Configuration: Disable overrides, but set h/w default values. */ +#define _BGP_PERS_L3SELECT_DEFAULT (_BGP_PERS_L3SELECT_CacheSel(21) |\ + _BGP_PERS_L3SELECT_BankSel(26)) + +/* Tracing Masks and default trace configuration */ +/* See also arch/include/cnk/Trace.h */ +#define _BGP_TRACE_CONFIG _BN( 0) /* Display Encoded personality config on startup */ +#define _BGP_TRACE_ENTRY _BN( 1) /* Function enter and exit */ +#define _BGP_TRACE_INTS _BN( 2) /* Standard Interrupt Dispatch */ +#define _BGP_TRACE_CINTS _BN( 3) /* Critical Interrupt Dispatch */ +#define _BGP_TRACE_MCHK _BN( 4) /* Machine Check Dispatch */ +#define _BGP_TRACE_SYSCALL _BN( 5) /* System Calls */ +#define _BGP_TRACE_VMM _BN( 6) /* Virtual Memory Manager */ +#define _BGP_TRACE_DEBUG _BN( 7) /* Debug Events (app crashes etc) */ +#define _BGP_TRACE_TORUS _BN( 8) /* Torus Init */ +#define _BGP_TRACE_TREE _BN( 9) /* Tree Init */ +#define _BGP_TRACE_GLOBINT _BN(10) /* Global Interrupts */ +#define _BGP_TRACE_DMA _BN(11) /* DMA Setup */ +#define _BGP_TRACE_SERDES _BN(12) /* SerDes Init */ +#define _BGP_TRACE_TESTINT _BN(13) /* Test Interface, ECID, Config */ +#define _BGP_TRACE_ETHTX _BN(14) /* Ethernet Transmit */ +#define _BGP_TRACE_ETHRX _BN(15) /* Ethernet Receive */ +#define _BGP_TRACE_POWER _BN(16) /* Power Control */ +#define _BGP_TRACE_PROCESS _BN(17) /* Process/Thread Mapping */ +#define _BGP_TRACE_EXIT_SUM _BN(18) /* Report Per-Core Interrupt and Error Summary on exit() */ +#define _BGP_TRACE_SCHED _BN(19) /* Report Scheduler Information */ +#define _BGP_TRACE_RAS _BN(20) /* Report RAS Events (in addition to sending to Host) */ +#define _BGP_TRACE_ECID _BN(21) /* Report UCI and ECID on boot */ +#define _BGP_TRACE_FUTEX _BN(22) /* Trace Futex operations */ +#define _BGP_TRACE_MemAlloc _BN(23) /* Trace MMAP and Shared Memory operations */ +#define _BGP_TRACE_CONTROL _BN(24) /* Trace control messages exchanged with I/O node */ +#define _BGP_TRACE_MSGS _BN(25) /* Trace messages and packets sent on virtual channel 0 */ +#define _BGP_TRACE_DEBUGGER _BN(26) /* Trace debugger messages exchanged with I/O node */ +#define _BGP_TRACE_WARNINGS _BN(30) /* Trace Warnings */ +#define _BGP_TRACE_VERBOSE _BN(31) /* Verbose Tracing Modifier */ + +/* Enable tracking of Regression Suite coverage and report UCI+ECID on boot */ +#define _BGP_PERS_TRACE_DEFAULT 0 +/* (_BGP_TRACE_CONFIG | _BGP_TRACE_ECID) */ + + +typedef struct _BGP_Personality_Kernel_t + { + uint32_t UniversalComponentIdentifier; /* see include/common/bgp_ras.h */ + + uint32_t FreqMHz; /* Clock_X1 Frequency in MegaHertz (eg 1000) */ + + uint32_t RASPolicy; /* Verbosity level, and other RAS Reporting Controls */ + + /* Process Config: */ + /* Each byte represents a process (1 to 4 processes supported) */ + /* No core can be assigned to more than 1 process. */ + /* Cores assigned to no process are disabled. */ + /* Cores with in a process share the same address space. */ + /* Separate processes have distinct address spaces. */ + /* Within each process (0 to 4 cores assigned to a process): */ + /* Lower nibble is bitmask of which core belongs to that process. */ + /* Upper nibble is bitmask whether that thread is privileged or user. */ + /* Processes with zero cores do not exist. */ + /* E.g., for Diagnostics, we sometimes use 0xFF000000, which means */ + /* that all 4 cores run privileged in process 0. */ + uint32_t ProcessConfig; + + uint32_t TraceConfig; /* Kernel Tracing Enables */ + uint32_t NodeConfig; /* Kernel Driver Enables */ + uint32_t L1Config; /* L1 Config and setup controls */ + uint32_t L2Config; /* L2 and Snoop Config and setup controls */ + uint32_t L3Config; /* L3 Config and setup controls */ + uint32_t L3Select; /* L3 Cache and Bank Selection controls */ + + uint32_t SharedMemMB; /* Memory to Reserve for Sharing among Processes */ + + uint32_t ClockStop0; /* Upper 11Bits of ClockStop, enabled if Non-zero */ + uint32_t ClockStop1; /* Lower 32Bits of ClockStop, enabled if Non-zero */ + } + _BGP_Personality_Kernel_t; + + +/* Defaults for DDR Config */ +#define _BGP_PERS_DDR_PBX0_DEFAULT (0x411D1512) /* PBX DCRs setting (in IBM bit numbering) */ +#define _BGP_PERS_DDR_PBX1_DEFAULT (0x40000000) /* PBX DCRs setting (in IBM bit numbering) */ +#define _BGP_PERS_DDR_MemConfig0_DEFAULT (0x81fc4080) /* MemConfig */ +#define _BGP_PERS_DDR_MemConfig1_DEFAULT (0x0C0ff800) /* MemConfig */ +#define _BGP_PERS_DDR_ParmCtl0_DEFAULT (0x3216c008) /* Parm Control */ +#define _BGP_PERS_DDR_ParmCtl1_DEFAULT (0x4168c323) /* Parm Control */ +#define _BGP_PERS_DDR_MiscCtl0_DEFAULT (0) /* Misc. Control */ +#define _BGP_PERS_DDR_MiscCtl1_DEFAULT (0) /* Misc. Control */ +#define _BGP_PERS_DDR_CmdBufMode0_DEFAULT (0x00400fdf) /* Command Buffer Mode */ +#define _BGP_PERS_DDR_CmdBufMode1_DEFAULT (0xffc80600) /* Command Buffer Mode */ +#define _BGP_PERS_DDR_RefrInterval0_DEFAULT (0xD1000002) /* Refresh Interval */ +#define _BGP_PERS_DDR_RefrInterval1_DEFAULT (0x04000000) /* Refresh Interval */ +#define _BGP_PERS_DDR_ODTCtl0_DEFAULT (0) /* ODT Control */ +#define _BGP_PERS_DDR_ODTCtl1_DEFAULT (0) /* ODT Control */ +#define _BGP_PERS_DDR_TimingTweaks_DEFAULT (0) /* DRAM timing tweaks to use */ +#define _BGP_PERS_DDR_DataStrobeCalib1_DEFAULT (0xa514c805) /* Data Strobe Calibration */ +#define _BGP_PERS_DDR_DQSCtl_DEFAULT (0x00000168) /* DQS Control */ +#define _BGP_PERS_DDR_Throttle_DEFAULT (0) /* DDR Throttle */ + +#define _BGP_PERS_DDR_CAS_DEFAULT (4) /* CAS Latency (3, 4, or 5) */ +#define _BGP_PERS_DDR_DDRSizeMB_DEFAULT (2048) /* Total DDR size in MegaBytes (512MB - 16384MB). */ +#define _BGP_PERS_DDR_Chips_DEFAULT (0x01) /* Type of DDR chips: 512GBx8 */ + +#define _BGP_PERS_DDRFLAGS_ENABLE_Scrub _BN(0) /* Enable DDR Slow Scrub when 1 */ + +/* DDRFLAGS default: Enable Slow Scrub. */ +#define _BGP_PERS_DDRFLAGS_DEFAULT (_BGP_PERS_DDRFLAGS_ENABLE_Scrub) + +#define _BGP_PERS_SRBS0_DEFAULT (0xFFFFFFFF) +#define _BGP_PERS_SRBS1_DEFAULT (0xFFFFFFFF) + +typedef struct _BGP_Personality_DDR_t + { + uint32_t DDRFlags; /* Misc. Flags and Settings */ + uint32_t SRBS0; /* Controller 0 SRBS/CK Settings */ + uint32_t SRBS1; /* Controller 1 SRBS/CK Settings */ + uint32_t PBX0; /* PBX DCRs setting (in IBM bit numbering) */ + uint32_t PBX1; /* PBX DCRs setting (in IBM bit numbering) */ + uint32_t MemConfig0; /* MemConfig */ + uint32_t MemConfig1; /* MemConfig */ + uint32_t ParmCtl0; /* Parm Control */ + uint32_t ParmCtl1; /* Parm Control */ + uint32_t MiscCtl0; /* Misc. Control */ + uint32_t MiscCtl1; /* Misc. Control */ + uint32_t CmdBufMode0; /* Command Buffer Mode */ + uint32_t CmdBufMode1; /* Command Buffer Mode */ + uint32_t RefrInterval0; /* Refresh Interval */ + uint32_t RefrInterval1; /* Refresh Interval */ + uint32_t ODTCtl0; /* ODT Control */ + uint32_t ODTCtl1; /* ODT Control */ + uint8_t TimingTweaks; /* DRAM timing tweak type */ + uint8_t Unused0; + uint8_t Unused1; + uint8_t Unused2; + uint32_t DataStrobeCalib1; /* Data Strobe Calibration */ + uint32_t DQSCtl; /* DQS Control */ + uint32_t Throttle; /* DDR Throttle */ + uint16_t DDRSizeMB; /* Total DDR size in MegaBytes (512MB - 16384MB). */ + uint8_t Chips; /* Type of DDR chips */ + uint8_t CAS; /* CAS Latency (3, 4, or 5) */ + } + _BGP_Personality_DDR_t; + + +typedef struct _BGP_Personality_Networks_t + { + uint32_t BlockID; /* a.k.a. PartitionID */ + + uint8_t Xnodes, + Ynodes, + Znodes, + Xcoord, + Ycoord, + Zcoord; + + /* PSet Support */ + uint16_t PSetNum; + uint32_t PSetSize; + uint32_t RankInPSet; + + uint32_t IOnodes; + uint32_t Rank; /* Rank in Block (or Partition) */ + uint32_t IOnodeRank; /* Rank (and therefore P2P Addr) of my I/O Node */ + uint16_t TreeRoutes[ 16 ]; + } + _BGP_Personality_Networks_t; + + +typedef struct _BGP_IP_Addr_t + { + /* IPv6 Addresses are 16 bytes, where the */ + /* lower 4 (indices 12-15) can be used for IPv4 address. */ + uint8_t octet[ 16 ]; + } + _BGP_IP_Addr_t; + + +typedef struct _BGP_Personality_Ethernet_t + { + uint16_t MTU; /* Initial emac MTU size */ + uint8_t EmacID[6]; /* MAC address for emac */ + _BGP_IP_Addr_t IPAddress; /* IPv6/IPv4 address of this node */ + _BGP_IP_Addr_t IPNetmask; /* IPv6/IPv4 netmask */ + _BGP_IP_Addr_t IPBroadcast; /* IPv6/IPv4 broadcast address */ + _BGP_IP_Addr_t IPGateway; /* IPv6/IPv4 initial gateway (zero if none) */ + _BGP_IP_Addr_t NFSServer; /* IPv6/IPv4 NFS system software server address */ + _BGP_IP_Addr_t serviceNode; /* IPv6/IPv4 address of service node */ + + /* NFS mount info */ + char NFSExportDir[_BGP_PERSONALITY_LEN_NFSDIR]; + char NFSMountDir[ _BGP_PERSONALITY_LEN_NFSDIR]; + + /* Security Key for Service Node authentication */ + uint8_t SecurityKey[ _BGP_PERSONALITY_LEN_SECKEY ]; + } + _BGP_Personality_Ethernet_t; + + +#define BGP_PERS_BLKCFG_IPOverCollective _BN(31) +#define BGP_PERS_BLKCFG_IPOverTorus _BN(30) +#define BGP_PERS_BLKCFG_IPOverCollectiveVC _BN(29) +#define BGP_PERS_BLKCFG_CIOModeSel(x) _B2(28,x) +#define BGP_PERS_BLKCFG_bgsysFSSel(x) _B3(26,x) +#define BGP_PERS_BLKCFG_CIOMode_Full 0 +#define BGP_PERS_BLKCFG_CIOMode_MuxOnly 1 +#define BGP_PERS_BLKCFG_CIOMode_None 2 +#define BGP_PERS_BLKCFG_bgsys_NFSv3 0 +#define BGP_PERS_BLKCFG_bgsys_NFSv4 1 +#define BGP_PERS_BLKCFG_DEFAULT (BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full) | \ + BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3)) + + +typedef struct T_BGP_Personality_t + { + uint16_t CRC; + uint8_t Version; + uint8_t PersonalitySizeWords; + + _BGP_Personality_Kernel_t Kernel_Config; + + _BGP_Personality_DDR_t DDR_Config; + + _BGP_Personality_Networks_t Network_Config; + + _BGP_Personality_Ethernet_t Ethernet_Config; + + uint8_t Block_Config; + uint8_t padd[7]; /* Pad size to multiple of 16 bytes (== width of DEVBUS_DATA tdr) */ + /* to simplify jtag operations. See issue #140. */ + } + _BGP_Personality_t; + +#define Network_Config_treeInfo0 DDR_Config.ODTCtl0 +#define Network_Config_treeInfo1 DDR_Config.ODTCtl1 +#define Network_Config_treeInfo2 DDR_Config.CmdBufMode0 + +/* _BGP_PersonalityTreeInfo provides information about one of the tree + * ports (A,B or C) on this node. It is a 32-bit value. + * See accessor methods below which interpret these fields with this layout: + * + * .-.-.--.--.--.------------------------. + * |V|R|LT|CW|DP| destP2Paddr | + * `-'-'--'--'--'------------------------' + * 1 1 2 2 2 24 <- bits in field + * + * V Valid bit. Use is deprecated. Was used for forward compatibility + * R Wire is redundant + * LT Link type (2 bit). 0->no wire, 1->compute node, 2->I/O node, 3->reserved + * CW CommWorld wire interpret (2 bit): 0->unused wire, 1->child, 2->parent + * DP Destination Port on other end of wire (2 bit) 0,1,2 -> A,B,C + * destP2Paddr (24 bit) Tree address of node on other end of the wire + */ + +#define _BGP_PERS_TREEINFO_VALID 0x80000000 +#define _BGP_PERS_TREEINFO_REDUNDANT 0x40000000 +#define _BGP_PERS_TREEINFO_LINKTYPE_MASK 0x30000000 +#define _BGP_PERS_TREEINFO_LINKTYPE_SHIFT 28 +#define _BGP_PERS_TREEINFO_COMMWORLD_MASK 0x0c000000 +#define _BGP_PERS_TREEINFO_COMMWORLD_SHIFT 26 +#define _BGP_PERS_TREEINFO_DESTPORT_MASK 0x03000000 +#define _BGP_PERS_TREEINFO_DESTPORT_SHIFT 24 +#define _BGP_PERS_TREEINFO_DESTP2P_MASK 0x00ffffff + +#define _BGP_PERS_TREEINFO_LINKTYPE_NOWIRE 0 +#define _BGP_PERS_TREEINFO_LINKTYPE_COMPUTE 1 +#define _BGP_PERS_TREEINFO_LINKTYPE_IO 2 + +#define _BGP_PERS_TREEINFO_COMMWORLD_UNUSED 0 /* unused wire */ +#define _BGP_PERS_TREEINFO_COMMWORLD_CHILD 1 +#define _BGP_PERS_TREEINFO_COMMWORLD_PARENT 2 + +#define _BGP_PERS_TREE_PORT_0 0 +#define _BGP_PERS_TREE_PORT_1 1 +#define _BGP_PERS_TREE_PORT_2 2 + +/* This struct is the layout on big endian architectures (ppc) */ +typedef struct { + unsigned valid:1; /* 1 -> this info is valid */ + unsigned redundant:1; /* 1 -> redundant wire */ + unsigned linkType:2; /* 0 -> no wire, 1 -> compute node, 2 -> I/O */ + unsigned commWorld:2; /* 1 -> child port, 2 -> parent port on comm_world tree */ + unsigned destPort:2; /* dest port 0,1,2 -> A,B,C */ + unsigned destP2Paddr:24; /* destination tree addr on this port */ +} _BGP_PersonalityTreeInfo_t; + + + +/* Define a static initializer for default configuration. (DEFAULTS FOR SIMULATION) */ +/* This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c */ +#define _BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER { \ + 0, /* CRC */ \ + _BGP_PERSONALITY_VERSION, /* Version */ \ + (sizeof(_BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \ + { /* _BGP_Personality_Kernel_t: */ \ + 0, /* MachineLocation */ \ + _BGP_DEFAULT_FREQ, /* FreqMHz */ \ + _BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \ + _BGP_PERS_PROCESSCONFIG_DEFAULT, /* ProcessConfig */ \ + _BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \ + _BGP_PERS_NODECONFIG_DEFAULT, /* NodeConfig */ \ + _BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \ + _BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \ + _BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \ + _BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \ + 0, /* SharedMemMB */ \ + 0, /* ClockStop0 */ \ + 0 /* ClockStop1 */ \ + }, \ + { /* _BGP_Personality_DDR_t: */ \ + _BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \ + _BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \ + _BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \ + _BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \ + _BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \ + _BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \ + _BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \ + _BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \ + _BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \ + _BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \ + _BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \ + _BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \ + _BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \ + _BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \ + _BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \ + _BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \ + _BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \ + _BGP_PERS_DDR_TimingTweaks_DEFAULT, /* TimingTweaks */ \ + _BGP_PERS_Unused_DEFAULT, /* Unused0 */ \ + _BGP_PERS_Unused_DEFAULT, /* Unused1 */ \ + _BGP_PERS_Unused_DEFAULT, /* Unused2 */ \ + _BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \ + _BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \ + _BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \ + _BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \ + _BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \ + _BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \ + }, \ + { /* _BGP_Personality_Networks_t: */ \ + 0, /* BlockID */ \ + 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \ + 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \ + 0, /* PSetNum */ \ + 0, /* PSetSize */ \ + 0, /* RankInPSet */ \ + 0, /* IOnodes */ \ + 0, /* Rank */ \ + 0, /* IOnodeRank */ \ + { 0, } /* TreeRoutes[ 16 ] */ \ + }, \ + { /* _BGP_Personality_Ethernet_t: */ \ + 1536, /* mtu */ \ + { 0, }, /* EmacID[6] */ \ + { { 0x00,0x00,0x00,0x00, /* IPAddress */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0xFF,0xFF,0xFF,0x70 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPGateway */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* NFSServer */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* serviceNode */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + "", /* NFSExportDir[32] */ \ + "", /* NFSMountDir[32] */ \ + { 0x00, } /* SecurityKey[32] */ \ + }, \ + 0, /* Block_Config */ \ + { 0, } /* padd[7] */ \ + } + + +/* Define a static initializer for default configuration. (DEFAULTS FOR HARDWARE) */ +/* This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c */ +#define _BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER_FOR_HARDWARE { \ + 0, /* CRC */ \ + _BGP_PERSONALITY_VERSION, /* Version */ \ + (sizeof(_BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \ + { /* _BGP_Personality_Kernel_t: */ \ + 0, /* MachineLocation */ \ + _BGP_DEFAULT_FREQ, /* FreqMHz */ \ + _BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \ + _BGP_PERS_PROCESSCONFIG_SMP, /* ProcessConfig */ \ + _BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \ + _BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE, /* NodeConfig */ \ + _BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \ + _BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \ + _BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \ + _BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \ + 0, /* SharedMemMB */ \ + 0, /* ClockStop0 */ \ + 0 /* ClockStop1 */ \ + }, \ + { /* _BGP_Personality_DDR_t: */ \ + _BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \ + _BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \ + _BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \ + _BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \ + _BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \ + _BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \ + _BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \ + _BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \ + _BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \ + _BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \ + _BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \ + _BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \ + _BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \ + _BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \ + _BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \ + _BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \ + _BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \ + _BGP_PERS_DDR_TimingTweaks_DEFAULT, /* TimingTweaks */ \ + _BGP_PERS_Unused_DEFAULT, /* Unused0 */ \ + _BGP_PERS_Unused_DEFAULT, /* Unused1 */ \ + _BGP_PERS_Unused_DEFAULT, /* Unused2 */ \ + _BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \ + _BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \ + _BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \ + _BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \ + _BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \ + _BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \ + }, \ + { /* _BGP_Personality_Networks_t: */ \ + 0, /* BlockID */ \ + 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \ + 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \ + 0, /* PSetNum */ \ + 0, /* PSetSize */ \ + 0, /* RankInPSet */ \ + 0, /* IOnodes */ \ + 0, /* Rank */ \ + 0, /* IOnodeRank */ \ + { 0, } /* TreeRoutes[ 16 ] */ \ + }, \ + { /* _BGP_Personality_Ethernet_t: */ \ + 1536, /* mtu */ \ + { 0, }, /* EmacID[6] */ \ + { { 0x00,0x00,0x00,0x00, /* IPAddress */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0xFF,0xFF,0xFF,0x70 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* IPGateway */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* NFSServer */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + { { 0x00,0x00,0x00,0x00, /* serviceNode */ \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00, \ + 0x00,0x00,0x00,0x00 \ + } }, \ + "", /* NFSExportDir[32] */ \ + "", /* NFSMountDir[32] */ \ + { 0x00, } /* SecurityKey[32] */ \ + }, \ + 0, /* Block_Config */ \ + { 0, } /* padd[7] */ \ + } + + +__END_DECLS + + + +#endif /* Add nothing below this line. */ diff --git a/arch/powerpc/include/common/namespace.h b/arch/powerpc/include/common/namespace.h new file mode 100644 index 00000000000000..a5ee88e0c836d1 --- /dev/null +++ b/arch/powerpc/include/common/namespace.h @@ -0,0 +1,47 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/** + * \file common/namespace.h + */ + +#ifndef _NAMESPACE_H_ /* Prevent multiple inclusion */ +#define _NAMESPACE_H_ + + + + +#if !defined(__ASSEMBLY__) && defined(__cplusplus) +#define __BEGIN_DECLS extern "C" { +#define __C_LINKAGE extern "C" +#else +#define __BEGIN_DECLS +#define __C_LINKAGE +#endif + + +#if !defined(__ASSEMBLY__) && defined(__cplusplus) +#define __END_DECLS } +#else +#define __END_DECLS +#endif + + + + +#endif /* Add nothing below this line */ diff --git a/arch/powerpc/include/spi/DMA_Assert.h b/arch/powerpc/include/spi/DMA_Assert.h new file mode 100644 index 00000000000000..5f21b64b1c8da8 --- /dev/null +++ b/arch/powerpc/include/spi/DMA_Assert.h @@ -0,0 +1,276 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + +#ifndef __DMA_ASSERT_H_ /* Prevent multiple inclusion */ +#define __DMA_ASSERT_H_ + +#ifndef __LINUX_KERNEL__ + +/*! + * \file spi/DMA_Assert.h + * + * \brief DMA SPI Assert Macros + * + * Two sets of assert macros are provided: + * - Kernel Asserts + * - User-mode Asserts + * + * When DMA SPIs are used within the kernel, a special assert routine is called + * that does NOT abort. It just prints the assertion and the location and + * continues. + * + * When DMA SPIs are used within user-mode code, the normal assert routine is + * called, which prints the assertion and location and aborts. + * + * Several levels of asserts are provided, and #define variables control which + * levels are activated. The following assert macros are available: + * + * SPI_abort - Always active and always issues assert(0). + * Primarily used for unimplemented code paths. + * Not available in the kernel. + * SPI_assert - Active by default, or when ASSERT_PROD is defined. + * Meant to flag user errors. + * SPI_assert_debug - Active by default. Meant to flag coding + * errors before shipping. + * + * The following #defines control which level of asserts are compiled into + * the code. Only one of ASSERT_ABORT, ASSERT_PROD (or nothing) should + * be specified. + * - ASSERT_ABORT means that the "abort" level is the only level + * of asserts that is active. Other levels are turned off. + * - ASSERT_PROD means that "abort" and "assert" levels are active. + * "assert_debug" is turned off. + * - Not specifying ASSERT_ABORT or ASSER_PROD means that all + * levels of asserts ("abort", "assert", "assert_debug") are + * active. + */ + +#include <common/namespace.h> + + +__BEGIN_DECLS + + +#include <stdio.h> + +/* ============================================================ */ + +#ifdef __CNK__ + +/*! + * \brief Production-level Kernel Assert. + * + * This production level of assert will be active during normal production + * code execution. + * + * When in the kernel, just do a printf, but don't exit. + */ +#define SPI_assert(x) DMA_KernelAssert(x) + +/*! + * \brief Debug-level Kernel Assert. + * + * This debug level of assert will only be active during in-house debugging. + * + * When in the kernel, just do a printf, but don't exit. + */ +#define SPI_assert_debug(x) DMA_KernelAssert(x) + +#ifdef NDEBUG + +/*! + * \brief No Debug Kernel Assert Internal Macro + * + * This macro is used internally for when asserts are turned off via the NDEBUG + * flag. It does nothing. + */ +#define DMA_KernelAssert( __assert_test ) ((void)0) + +/* ============================================================ */ + +#else /* not NDEBUG */ + +/*! + * \brief Kernel Assert Internal Function + * + * This function is called when the kernel determines that it needs to assert. + * It prints the assertion that failed and the code location, but does not + * abort. The kernel should continue executing. + * + * \param[in] Pointer to the assertion string that failed the test + * \param[in] Pointer to the name of the source file that coded the assert + * \param[in] Line number within the source file that coded the assert + */ +extern inline void __DMA_KernelAssert( const char *__assertion, + const char *__file, + int __line ) +{ + printf("Assertion Failed: %s, file %s, line %d.\n", + __assertion, + __file, + __line ); +} + + +/*! + * \brief Kernel Assert Internal Macro + * + * This macro is used internally when asserts are turned on (the NDEBUG flag + * is not specified). It tests the assertion. If the assertion is true, it + * does nothing. If the assertion is false, it invokes the __DMA_KernelAssert + * internal function to print out the assert information. + * + * \param[in] Pointer to the assertion string that failed the test + * \param[in] Pointer to the name of the source file that coded the assert + * \param[in] Line number within the source file that coded the assert + */ +#define DMA_KernelAssert( __assert_test ) \ + ((__assert_test) ? ((void)0) : \ + __DMA_KernelAssert( #__assert_test, __FILE__, __LINE__ )) + + +#endif /* NDEBUG */ + +/* ============================================================ */ + +#else /* not __CNK__ */ + +#include <assert.h> + +#ifdef ASSERT_ABORT + +/*! + * \brief Abort-level Abort Assert + * + * This macro is defined when the ASSERT_ABORT level of asserts is active. + * + * This macro will assert(0). + * + */ +#define SPI_abort() assert(0) + +/*! + * \brief Abort-level Production Assert + * + * This macro is defined when the ASSERT_ABORT level of asserts is active. + * This macro will not assert. It will simply execute the assert test, but + * because abort-level-only asserts are active, it will not assert. + * + */ +#define SPI_assert(x) + +/*! + * \brief Abort-level Debug Assert + * + * This macro is defined when the ASSERT_ABORT level of asserts is active. + * This macro will not assert. It will simply execute the assert test, but + * because abort-level-only asserts are active, it will not assert. + * + */ +#define SPI_assert_debug(x) + +/* ============================================================ */ + +#else /* Not ASSERT_ABORT */ + +#ifdef ASSERT_PROD + +/*! + * \brief Production-level Abort Assert + * + * This macro is defined when the ASSERT_PROD level of asserts is active. + * + * This macro will assert(0). + * + */ +#define SPI_abort() assert(0) + +/*! + * \brief Production-level Production Assert + * + * This macro is defined when the ASSERT_PROD level of asserts is active. + * + * This macro invokes the standard assert() function with the specified + * assert test. + */ +#define SPI_assert(x) assert(x) + +/*! + * \brief Production-level Debug Assert + * + * This macro is defined when the ASSERT_PROD level of asserts is active. + * + * This macro will not assert. It will simply execute the assert test, but + * because production-level-only asserts are active, it will not assert. + */ +#define SPI_assert_debug(x) + +/* ============================================================ */ + +#else /* Not ASSERT_PROD */ + +/*! + * \brief Debug-level Abort Assert + * + * This macro is defined when all levels of asserts are desired (neither the + * ASSERT_ABORT nor ASSERT_PROD level of asserts is active. This is the + * default). + * + * This macro will assert(0). + * + */ +#define SPI_abort() assert(0) + +/*! + * \brief Debug-level Production Assert + * + * This macro is defined when all levels of asserts are desired (neither the + * ASSERT_ABORT nor ASSERT_PROD level of asserts is active. This is the + * default). + * + * This macro invokes the standard assert() function with the specified + * assert test. + */ +#define SPI_assert(x) assert(x) + +/*! + * \brief Debug-level Debug Assert + * + * This macro is defined when all levels of asserts are desired (neither the + * ASSERT_ABORT nor ASSERT_PROD level of asserts is active. This is the + * default). + * + * This macro invokes the standard assert() function with the specified + * assert test. + */ +#define SPI_assert_debug(x) assert(x) + +#endif + +#endif + +#endif /* __CNK__ */ + + +__END_DECLS + + +#endif /* ! __LINUX_KERNEL__ */ + +#endif diff --git a/arch/powerpc/include/spi/DMA_Counter.h b/arch/powerpc/include/spi/DMA_Counter.h new file mode 100644 index 00000000000000..4a46a19ea07829 --- /dev/null +++ b/arch/powerpc/include/spi/DMA_Counter.h @@ -0,0 +1,2986 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + +#ifndef _DMA_COUNTER_H_ /* Prevent multiple inclusion */ +#define _DMA_COUNTER_H_ + + +/*! + * \file spi/DMA_Counter.h + * + * \brief DMA SPI Counter Definitions and Inline Functions + * + * This include file contains inline functions that are used to interface with + * BG/P DMA injection and reception counters at the lowest level. + * Functions include + * - set and get a counter's value and base address + * - enable and disable a counter or group of counters + * - query whether a counter or group of counters has hit zero + * - clear a counter's or group of counters' hit-zero state + * - set and get a reception counter's maximum address + * + * Definitions: + * - A counter is a 32-bit value containing the number of bytes being + * transferred from/to memory + * - Associated with a counter is a base address indicating where the data is + * being transferred from/to + * - Associated with a reception counter is a max address bounding the DMA + * transfer. + * - There are injection (iDMA) and reception (rDMA) counters + * - There are DMA_NUM_COUNTERS iDMA counters and DMA_NUM_COUNTERS rDMA + * counters + * - A counter group consists of DMA_NUM_COUNTERS_PER_GROUP counters + * - There are DMA_NUM_COUNTER_GROUPS iDMA counter groups and + * DMA_NUM_COUNTER_GROUPS rDMA counter groups + * - A subgroup consists of DMA_NUM_COUNTERS_PER_SUBGROUP counters. This is + * the unit of counter allocation. + * - The highest-level counter inlines in this include file work with virtual + * addresses. They are converted to physical addresses and placed into the + * counter. + * - The counter's base and max addresses reside in the DMA memory map (DMA + * SRAM). The DMA_CounterHw_t structure, known as the hardware counter + * structure maps a single counter in this storage. They are "shadowed" by + * these inline functions to a DMA_Counter_t structure in DDR memory, + * known as the software counter structure, and their associated virtual + * address is also stored in that structure for easy retrieval. The + * physical addresses really don't have to reside in this shadow structure, + * but it is faster to access them there than from the DMA's SRAM. + * - The counter's base and max addresses are stored in the DMA SRAM as + * 16B-aligned 4-bit shifted physical addresses. That is, the 36-bit + * physical address is right shifted 4 bits, aligning it on a 16B boundary + * leaving 32 bits. The following naming conventions are used to store + * addresses: + * - pa_xxxx: Physical address (32-bit, 16B-aligned 4-bit shifted) + * - va_xxxx: Virtual address (32 bits). + * + * \verbatim Picture of data structures: + + ========DDR MEMORY===================|==========DMA SRAM MEMORY============= + ------------------------------ | + | DMA_CounterGroup_t | | + | | | -------------------------------- + | status --------------------|-------|---->| DMA_CounterStatus_t | + | counter[0..63] | | -------------------------------- + | ------------------------ | | + | | DMA_Counter_t | | | ----------------------------- + | 0 | (software counter) | | | | DMA_CounterHw_t | + | | counter_hw_ptr-------|-|-------|---->| (hardware counter) | + | ------------------------ | | ----------------------------- + | . | | + | . | | + | . | | + | ------------------------ | | + | | DMA_Counter_t | | | ----------------------------- + |63 | (software counter) | | | | DMA_CounterHw_t | + | | counter_hw_ptr-------|-|-------|---->| (hardware counter) | + | ------------------------ | | ----------------------------- + | . | | + ------------------------------ | + + \endverbatim + * + * \note Memory consistency/coherency inside these inlines is achieved using + * mbar and msync. + * + * MBAR is used to make sure that all writes to memory issued by the + * calling core have been accepted by the memory system before + * continuing. This guarantees that writes and reads to/from different + * addresses to go in defined order. + * + * MBAR EXAMPLE 1: When a store is done to DMA SRAM, it may not complete + * for a period of time. If a counter value is set, and then an injection + * fifo tail pointer is set, DMA may see the tail pointer update and begin + * the operation before the counter value has been set. Inserting an mbar + * between the setting of the counter and the setting of the tail pointer + * guarantees that the counter will be set before the tail pointer is + * updated. + * + * MBAR EXAMPLE 2: A counter hits zero. We process the hit-zero and write + * a "clear hit zero" to DMA SRAM, and then go read that counter's hit-zero + * status (different address). The hit-zero status will still indicate + * that it hit zero, even though we have already processed it, unless an + * mbar is inserted between clearing the hit-zero and reading the hit-zero + * status. + * + * MBAR PHILOSOPHY: After DMA SRAM is updated in the DMA inline functions, + * they always do at least an mbar (possibly an msync instead...see below). + * + * MSYNC does what mbar does, plus ensures consistency across cores. That + * is, it waits for snoops (invalidations of L1 cache) on the other cores + * to complete before continuing. This guarantees that all of the cores + * will see a consistent view of memory after the msync. + * + * MSYNC EXAMPLE: When a reception counter has hit zero, we assume the + * DMA'd data is available to be read by any core. However, old copies of + * that data may still be in the L1 caches. Inserting an msync after + * detecting that a counter has hit zero guarantees that the old data has + * been removed from the L1 caches. + * + * MSYNC PHILOSOPHY: After the inline functions detect that a counter has + * hit zero, they always do an msync. + * + * SPECULATIVE EXECUTION OF MSYNC: There are cases where msync is done + * conditionally. The CPU will begin execution of both sides of the + * condition before the result of the condition has been determined. + * Then, it will cancel the execution of one side once the result of the + * condition has been determined. This speculation is unwanted when + * the first instruction on one side of the condition is msync because + * cancelling an msync is similar to executing the complete msync. + * To avoid this speculative execution of msync, we call + * _bgp_msync_nonspeculative(). This will trick the CPU so it won't begin + * the msync until the result of the condition is known. + * + * CALLER ADVICE: Users of these functions should not need to do + * mbar/msync themselves, unless they are doing something like the + * following: Read a counter and operate on the result when the counter + * hasn't reached zero. The caller will need to perform an msync after + * reading the counter in order to ensure that snoops have completed + * on all CPUs before operating on the DMA'd data. + * + */ + + +#include <common/namespace.h> + + +__BEGIN_DECLS + + +/*! + * \brief __INLINE__ definition + * + * Option 1: + * Make all functions be "static inline": + * - They are inlined if the compiler can do it + * - If the compiler does not inline it, a single copy of the function is + * placed in the translation unit (eg. xxx.c)for use within that unit. + * The function is not externalized for use by another unit...we want this + * so we don't end up with multiple units exporting the same function, + * which would result in linker errors. + * + * Option 2: + * A GNU C model: Use "extern inline" in a common header (this one) and provide + * a definition in a .c file somewhere, perhaps using macros to ensure that the + * same code is used in each case. For instance, in the header file: + * + * \verbatim + #ifndef INLINE + # define INLINE extern inline + #endif + INLINE int max(int a, int b) { + return a > b ? a : b; + } + \endverbatim + * + * ...and in exactly one source file (in runtime/SPI), that is included in a + * library... + * + * \verbatim + #define INLINE + #include "header.h" + \endverbatim + * + * This allows inlining, where possible, but when not possible, only one + * instance of the function is in storage (in the library). + */ +#ifndef __INLINE__ +#define __INLINE__ extern inline +#endif + + +#ifndef __LINUX_KERNEL__ + +#include <errno.h> +#include <bpcore/ppc450_inlines.h> /* For bgp_msync_nonspeculative() */ + +#endif /* ! __LINUX_KERNEL__ */ + +#include <spi/DMA_Assert.h> +#include <spi/bpcore_interface.h> /* For _BGP_IC_DMA_NFT_G3_HIER_POS*/ +#include <spi/kernel_interface.h> /* For Kernel_Virtual2Physical() */ + +/* #include <asm/bluegene.h> */ +static inline unsigned bic_hw_to_irq(unsigned group, unsigned gint) +{ + return ((group+1) << 5) | (gint & 0x1f); +} + + +/* + * ------------------------------------------------------------------------------ + * Definitions + * ------------------------------------------------------------------------------ + */ + +/*! + * \brief Number of DMA counter groups + * + * There are 4 counter groups. + * + */ +#define DMA_NUM_COUNTER_GROUPS 4 + + +/*! + * \brief Number of DMA counters in a counter group + * + * There are 64 counters in a counter group. + * + */ +#define DMA_NUM_COUNTERS_PER_GROUP 64 + + +/*! + * \brief Number of DMA counters in a counter subgroup + * + * There are 8 counters in a counter subgroup. + * + */ +#define DMA_NUM_COUNTERS_PER_SUBGROUP 8 + + +/*! + * \brief Number of DMA counter subgroups in a group + * + * There are 8 subgroups in a counter group. + * + */ +#define DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP (DMA_NUM_COUNTERS_PER_GROUP / DMA_NUM_COUNTERS_PER_SUBGROUP) + + +/*! + * \brief Number of DMA counter subgroups, in total, across all groups + * + * There are 32 subgroups in total. + * + */ +#define DMA_NUM_COUNTER_SUBGROUPS (DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP * DMA_NUM_COUNTER_GROUPS) + + +/*! + * \brief Initial value for a DMA counter + * + * This value is somewhat arbitrary, but is chosen to be different from zero, + * because zero means the counter has hit zero, and may cause false interupts. + * + */ +#define DMA_NUM_COUNTERS ( DMA_NUM_COUNTER_GROUPS * DMA_NUM_COUNTERS_PER_GROUP) + + +/*! + * \brief Initial value for a DMA counter + * + * This value is somewhat arbitrary, but is chosen to be different from zero, + * because zero means the counter has hit zero, and may cause false interupts. + * + */ +#define DMA_COUNTER_INIT_VAL 0xFFFFFFFF + + +/*! + * \brief Max Number of Cores Per Node + * + * This is the maximum number of cores that can run on a compute node. + */ +#define DMA_MAX_NUM_CORES 4 + + +/*! + * \brief Returns the word number that the specified counter is in + * + * \param[in] counter_id The ID of the counter (0 to + * DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \return The number of the word that the specified counter is in (0 or 1) + * + * Used as an index in the "enabled", "enable", "disable", "hit_zero", and + * "clear_hit_zero" fields of the DMA_CounterStatus_t structure, and + * the permissions field of the DMA_CounterGroup_t structure. + * + */ +#define DMA_COUNTER_GROUP_WORD_ID(counter_id) ((counter_id)>>5) + + +/*! + * \brief Returns the bit within the word that the specified counter is in + * + * \param[in] counter_id The ID of the counter (0 to + * DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \return The bit position within the word that the specified counter is + * in (0-31) + * + * Used with the "enabled", "enable", "disable", "hit_zero", and + * "clear_hit_zero" fields of the DMA_CounterStatus_t structure, and + * the permissions" field of the DMA_CounterGroup_t structure. + * + */ +#define DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id) ((counter_id) & 0x0000001F) + + +/* + * ----------------------------------------------------------------------------- + * Structures + * ----------------------------------------------------------------------------- + */ + +/*! + * \brief Hardware DMA Counter + * + * This maps a DMA counter as it is in the DMA memory map (DMA SRAM). + * + */ +typedef struct DMA_CounterHw_t +{ + volatile unsigned counter; /*!< RW Value of the counter */ + volatile unsigned increment; /*!< W Increment the counter by this value */ + volatile unsigned pa_base; /*!< RW Base address of the counter, 32 bit + 16B-aligned 4-bit shifted address */ + volatile unsigned pa_max; /*!< RW Maximum payload address (rDMA only), + 16B-aligned 4-bit shifted address */ +} +DMA_CounterHw_t; + + +/*! + * \brief DMA Counter Hardware Status structure + * + * This structure maps the DMA SRAM for a particular group of + * DMA_NUM_COUNTERS_PER_GROUP counters. + * + * This is a common structure between iDMA and rDMA. + * + * \see DMA_COUNTER_GROUP_WORD_ID + * \see DMA_COUNTER_GROUP_WORD_BIT_ID + * + */ +typedef struct DMA_CounterStatus_t +{ + volatile unsigned enabled[2]; /*!< R bitmask (1 bit/counter): + Counter is enabled (1=enabled) */ + volatile unsigned enable[2]; /*!< W bitmask (1 bit/counter): + Counter enable: writing a 1 to + bit i enables counter i. This + changes the corrresponding bit + in enabled. */ + volatile unsigned disable[2]; /*!< W bitmask (1 bit/counter): + Counter disble: writing a 1 to + bit i disbles counter i. This + changes the corrresponding bit + in enabled. */ + volatile unsigned reserved[2]; /*!< HOLE */ + volatile unsigned hit_zero[2]; /*!< R bitmask (1 bit/counter): + Counter hit zero + (1=counter hit zero) */ + volatile unsigned clear_hit_zero[2]; /*!< W bitmask (1 bit/counter): + Clear counter hit zero: writing + a 1 to bit i clears the + corresponding bit in hit_zero. */ + volatile unsigned grp_status; /*!< R bitmask (1 bit/subgroup): + bit i is 1 if or-reduce over + sub-group i of the hit_zero bits + anded with the enable bits. + Note this includes info about + all DMA_NUM_COUNTERS counters, + not just those in this group. */ +} +DMA_CounterStatus_t; + + +/*! + * \brief Software DMA Counter Structure + * + * This structure provides a shadow (recent copy) of the hardware counter's + * base and max. While accessing the actual hardware DMA counter's base and + * max is equivalent, it is slower than accessing them from here. + * + * Additionally, it stores the corresponding virtual addresses, for easy + * retrieval, since the hardware counter does not maintain the virtual + * address. + * + * Finally, it contains a pointer to the corresponding hardware counter in + * DMA SRAM. + * + */ +typedef struct DMA_Counter_t +{ + void *va_base; /*!< Shadow virtual address of the base */ + unsigned int pa_base; /*!< Shadow physical address of the base. + 16B-aligned 4-bit shifted address. */ + void *va_max; /*!< Shadow virtual address of the max (rDMA only) */ + unsigned int pa_max; /*!< Shadow physical address of the max (rDMA only) + 16B-aligned 4-bit shifted address. */ + DMA_CounterHw_t *counter_hw_ptr; /*!< Pointer to the hardware counter */ +} +ALIGN_L1D_CACHE DMA_Counter_t; +/*! + * \todo Re-think whether we need to align this structure on a L1 cache line boundary + * + */ + + +/*! + * \enum DMA_Type_t + * \brief DMA type (injection/reception) enum + * + */ +typedef enum DMA_Type_t +{ + DMA_Type_Injection = 0, /*!< Injection type of DMA */ + DMA_Type_Reception = 1 /*!< Reception type of DMA */ + +} +DMA_Type_t; + + +/*! + * \brief DMA Counter Group Structure + * + * This structure defines a DMA Counter Group. It is filled in by the kernel + * during the DMA_CounterGroupAllocate system call. It points to a + * DMA Counter Status structure, and contains up to DMA_NUM_COUNTERS_PER_GROUP + * software DMA Counter structures making up this group. + * + * It also contains permission bits to use the counters, one bit per counter. + * When the permission bit is on, the corresponding counter belongs to this + * group and can be used. Otherwise, the counter should not be used as part + * of this group. These permission bits are used as follows: + * 1. Inline functions will ASSERT when an attempt is made + * to use a counter that is not part of this group. + * 2. Inline functions will use the permission bits as a mask + * to return status information only for the counters allocated + * to this group. + * Use the DMA_COUNTER_GROUP_WORD_ID and DMA_COUNTER_GROUP_WORD_BIT_ID + * macros to locate the appropriate "permitted_counters" bit. + * + * Allocations are done in subgroups (groups of DMA_NUM_COUNTERS_PER_SUBGROUP + * counters). This structure contains a bit mask of the subgroups that belong + * to this group. + * + * \see DMA_COUNTER_GROUP_WORD_ID + * \see DMA_COUNTER_GROUP_WORD_BIT_ID + * + */ +typedef struct DMA_CounterGroup_t +{ + + DMA_CounterStatus_t *status_ptr; /*!< Pointer to counter status */ + unsigned int permissions[2]; /*!< Bit i is 1 if permitted to use + counter i, 0 otherwise. One bit + per counter, + DMA_NUM_COUNTERS_PER_GROUP + counters. */ + unsigned int grp_permissions; /*!< Bit i is 1 if permitted to use + subgroup i, 0 otherwise. One + bit per subgroup, 8 subgroups. */ + unsigned int group_id; /*!< The id of this group (0 to + DMA_NUM_COUNTER_GROUPS-1). */ + DMA_Type_t type; /*!< The type of the DMA (injection + or reception) */ + DMA_Counter_t counter[DMA_NUM_COUNTERS_PER_GROUP]; /*!< + Software Counter Structures. + i-th structure's hardware + pointer is non-NULL if + permissions[i]=1, NULL if + permissions[i]=0. */ +} +DMA_CounterGroup_t; + + +/*! + * + * \brief Counter Application Segment + * + * A segment of user-addressible memory. + * Each segment consists of a virtual address, physical address, and length + * defining a contiguous segment of storage that is accessible from the + * application. + */ +typedef struct DMA_CounterAppSegment_t +{ + unsigned int length; /*!< Length in bytes of the segment */ + uint32_t va_base; /*!< Virtual address of the segment base */ + uint32_t pa_base; /*!< Shifted physical address of the segment base */ + uint32_t va_max; /*!< Virtual address of the last byte of segment */ +} DMA_CounterAppSegment_t; + + +/*! + * + * \brief Counter Application Segments + * + * An array of application segments. There are N application segments per core + * on a node. Thus there are N * (number of cores on a node) application + * segments in this array. The first group of segments in the array correspond + * to core 0. The second group, core 1, etc. + */ +extern DMA_CounterAppSegment_t *DMA_CounterAppSegmentArray; + + +/*! + * \brief Number of application segments for a core + * + * The number of application segments is the same for all cores. + */ +extern uint32_t DMA_CounterNumAppSegments; + + +/*! + * \brief The index of the last application segment accessed for a core. + */ +extern int DMA_CounterCachedAppSegmentIndex[DMA_MAX_NUM_CORES]; + + +/*! + * \brief The Minimum 4-bit Shifted Physical Address Accessible From User Mode + */ +extern uint32_t DMA_CounterMinPaAccessibleFromUserMode[DMA_MAX_NUM_CORES]; + +/*! + * + * \brief Initialize Counter Application Segments + * + * Initialize the array of application segments and the global pointer to it. + * This identifies the memory regions that the application can access. + * + * Also, initialize the minimum physical address accessible from user mode + * for each core. + * + * \retval 0 Success + * \retval errorNumber Failure + */ +int DMA_CounterInitAppSegments(void); + + +/*! + * + * \brief Get Number of Counter Application Segments + * + * \returns Number of application segments for a core. + */ +__INLINE__ uint32_t DMA_CounterGetNumAppSegments( void ) +{ + return ( DMA_CounterNumAppSegments ); +} + + +/*! + * + * \brief Get Pointer to Counter Application Segments + * + * \param[in] Core number whose application segments pointer is to be + * returned. + * + * \returns Pointer to application segments + */ +__INLINE__ DMA_CounterAppSegment_t * DMA_CounterGetAppSegments( unsigned int coreNum ) +{ + SPI_assert ( coreNum >= 0 ); + SPI_assert ( coreNum <= DMA_MAX_NUM_CORES ); + + { + unsigned int index = coreNum * DMA_CounterGetNumAppSegments(); + return ( & ( DMA_CounterAppSegmentArray [ index ] ) ); + } +} + + +/*! + * + * \brief Get Virtual Addresses for the Min and Max Physical Addresses + * for User Space + * + * Based on information in the DMA_CounterAppSegments array, return the + * virtual addresses associated with the min and max physical addresses + * allowed for user space. + * + * \param[out] va_min Pointer to a pointer. Upon return, the pointer is + * set to the virtual address associated with the + * minimum physical address allowed for user space. + * \param[out] va_max Pointer to a pointer. Upon return, the pointer is + * set to the virtual address associated with the + * maximum physical address allowed for user space. + * + * If the DMA_CounterNumAppSegments array has not been initialized yet + * (it is initialized in DMA_CounterGroupAllocate()), a value of 0 for the + * min and 0xFFFFFFFF max is returned. + */ +__INLINE__ void DMA_CounterGetMinMaxVa(void ** va_min, + void ** va_max) +{ + /* Determine the core we are running on so the correct application + * segments are consulted + */ + unsigned int coreNum = Kernel_PhysicalProcessorID(); + DMA_CounterAppSegment_t * appSegmentArray = DMA_CounterGetAppSegments(coreNum); + uint32_t numAppSegments = DMA_CounterGetNumAppSegments(); + + if ( appSegmentArray ) + { + uint32_t minPaBase=0xFFFFFFFF, maxPa=0; + uint32_t segmentPaBase, segmentPaMax; + uint32_t i, minIndex=0, maxIndex=0; + + for (i=0; i<numAppSegments; i++) + { + segmentPaBase = appSegmentArray[i].pa_base; + if ( segmentPaBase < minPaBase ) + { + minPaBase = segmentPaBase; + minIndex = i; + } + + segmentPaMax = appSegmentArray[i].pa_base + (appSegmentArray[i].length >> 4); + if ( segmentPaMax > maxPa ) + { + maxPa = segmentPaMax; + maxIndex = i; + } + } + + *va_min = (void*)(appSegmentArray[minIndex].va_base); + *va_max = (void*)(appSegmentArray[maxIndex].va_max); + +/* printf("coreNum=%d, va_min = 0x%08x, minIndex=%d, va_max = 0x%08x, maxIndex=%d, minPa=0x%08x maxPa=0x%08x\n",coreNum,(unsigned)*va_min, minIndex, (unsigned)*va_max, maxIndex, minPaBase, maxPa); */ +/* fflush(stdout); */ + } + else + { + *va_min = (void*)0; + *va_max = (void*)0xFFFFFFFF; + } +} + + +/*! + * \brief Convert a 32-bit virtual address to a 32-bit physical address + * + * This function is a wrapper around _bgp_Virtual2Physical(), only it combines + * its 36-bit output into a 32-bit physical address by right-shifting it 4 bits. + * Thus, the physical address returned corresponds to the input virtual address + * rounded down to the next lowest 16-byte boundary. + * + * \param[in] VA 32-bit virtual address to be converted + * \param[in] vsize Size in bytes of virtual range + * \param[out] pPA Pointer to 32-bit physical address. The output physical + * address is returned in the storage pointed to by pPA. + * + * \retval 0 Successful. The output physical address is in *pPA + * \retval -1 Invalid Virtual Address for this process. *pPA unmodified. + * \retval -2 The range from VA to (VA+vsize-1) is not physically + * contiguous + * \retval -3 Virtual Address is in Scratch, but no Scratch, or not enough + * Scratch, is enabled. *pPA unmodified. + * + */ +__INLINE__ int Kernel_VaTo4bitShiftedPa(void *VA, + size_t vsize, + uint32_t *pPA ) +{ + int rc; + uint32_t ua_out, pa_out; + + SPI_assert( pPA != NULL ); + + rc = Kernel_Virtual2Physical(VA, + vsize, + &ua_out, + &pa_out ); + + if ( rc == 0 ) + { + *pPA = (ua_out << 28) | (pa_out >> 4); + } + + return (rc); +} + + +/* + *------------------------------------------------------------------------------ + * + * The following are inline function wrappers around system calls that + * operate on DMA counters. + * + *------------------------------------------------------------------------------ + */ + + +/*! + * \brief Query Free DMA Counter Subgroups within a Group + * + * This function is a wrapper around a system call that returns a list of the + * free (available) subgroups within the specified group. + * + * \param[in] type Specifies whether this is an injection or + * reception counter group (DMA_Type_Injection + * or DMA_Type_Reception) + * \param[in] grp Group number being queried (0 to + * DMA_NUM_COUNTER_GROUPS-1) + * \param[out] num_subgroups Pointer to an int where the number of free + * subgroups in the specified group is returned + * \param[out] subgroups Pointer to an array of num_subgroups ints where + * the list of num_subgroups subgroups is returned. + * Each int is the subgroup number + * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1). The + * caller must provide space for + * DMA_NUM_COUNTERS_PER_SUBGROUP ints, in case the + * entire counter group is free. + * + * \retval 0 Successful. num_subgroups and subgroups array set as described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \note The kernel may need to synchronize with other cores performing + * allocate or free syscalls. + * + */ +__INLINE__ int DMA_CounterGroupQueryFree( + DMA_Type_t type, + int grp, + int *num_subgroups, + int *subgroups + ) +{ + return Kernel_CounterGroupQueryFree( (uint32_t)type, + grp, + (uint32_t*)num_subgroups, + (uint32_t*)subgroups); +} + + +/*! + * \brief Allocate DMA Counters From A Group + * + * This function is a wrapper around a system call that allocates DMA counters + * from the specified group. Counters may be allocated in subgroups of + * DMA_NUM_COUNTERS_PER_SUBGROUP counters. Parameters specify how interrupts, + * generated when a counter hits zero, are to be handled. A + * DMA_CounterGroup_t structure is returned for use in other inline + * functions to operate on the allocated counters. + * + * \param[in] type Specifies whether this is an injection or + * reception counter group (DMA_Type_Injection + * or DMA_Type_Reception) + * \param[in] grp Group number whose counters are being allocated + * (0 to DMA_NUM_COUNTER_GROUPS-1) + * \param[in] num_subgroups Number of subgroups to be allocated from the group + * (1 to DMA_NUM_COUNTERS_PER_SUBGROUP) + * \param[in] subgroups Pointer to an array of num_subgroups ints where + * the list of subgroups to be allocated is provided. + * Each int is the subgroup number + * (0 to num_subgroups-1). + * \param[in] target The core that will receive the interrupt when a + * counter in this allocation hits zero + * (0 to DMA_NUM_COUNTER_GROUPS-1) + * \param[in] handler A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * counter in this allocation hits zero. This + * function must be coded to take 4 uint32_t + * parameters: + * - A pointer to storage specific to this + * handler. This is the handler_parm + * specified on this allocation function. + * - Three unint32_t parameters that are not used. + * If handler is NULL, hit-zero interrupts will not + * be enabled for these counters. + * \param[in] handler_parm A pointer to storage that should be passed to the + * interrupt handling function (see handler + * parameter) + * \param[in] interruptGroup A InterruptGroup_t that identifies the + * group of interrupts that the counters being + * allocated will become part of. + * \param[out] cg_ptr Pointer to a structure that is filled in upon + * successful return for use in other inline + * functions to operate on the allocated counters. + * \li counter - Array of software counter + * structures. Each element + * points to the corresponding + * hardware counter in DMA SRAM. + * Pointers are null if not + * allocated). + * Counters are initialized to + * DMA_COUNTER_INIT_VAL, + * disabled, their hit_zero bit + * is off, base and max are NULL. + * \li status_ptr - Points to status area within the + * DMA memory map. + * \li permissions - Bits set for each allocated + * counter + * \li grp_permissions - Permissions for each + * subgroup + * \li group_id - The group number + * \li type - The type of DMA (injection or + * reception) + * + * \retval 0 Successful. Counters allocated and cg_ptr structure filled in as + * described. + * \retval -1 Unsuccessful. errno gives the reason. Nothing has been + * allocated. + * + * \note The kernel may need to synchronize with other cores performing queries + * or frees. + * + */ +__INLINE__ int DMA_CounterGroupAllocate( + DMA_Type_t type, + int grp, + int num_subgroups, + int *subgroups, + int target, + Kernel_CommThreadHandler handler, + void *handler_parm, + Kernel_InterruptGroup_t interruptGroup, + DMA_CounterGroup_t *cg_ptr + ) +{ + int rc; + /* + * Initialize the Counter Application Segment array and its global pointer if + * it has not been initialized yet. + */ + if ( DMA_CounterAppSegmentArray == NULL ) + { + rc = DMA_CounterInitAppSegments(); + if (rc) return(rc); + } + + /* + * If an interrupt handler has been specified, invoke the system call + * to configure the kernel to invoke the handler when the hit zero + * interrupt fires. + */ + + if (handler) + { + /* + * Calculate the IRQ to be one of + * - 0: inj counter hit zero vector 0 + * - 1: inj counter hit zero vector 1 + * - 2: inj counter hit zero vector 2 + * - 3: inj counter hit zero vector 3 + * + * - 4: rec counter hit zero vector 0 + * - 5: rec counter hit zero vector 1 + * - 6: rec counter hit zero vector 2 + * - 7: rec counter hit zero vector 3 + * based on the counter type and the DMA group number. + */ + unsigned irqInGroup = (type == DMA_Type_Injection) ? 0 + grp : 4 + grp; + + /* + * Calculate an interrupt ID, which is the BIC interrupt group (3) + * combined with the IRQ number. + */ +/* int interruptID = Kernel_MkInterruptID(_BGP_IC_DMA_NFT_G3_HIER_POS, */ +/* irqInGroup); */ + int interruptID = bic_hw_to_irq(_BGP_IC_DMA_NFT_G3_HIER_POS, + irqInGroup); + + /* + * Calculate the opcode indicating + * - the target core for interrupt + * - to call the specified function when the interrupt fires + * - to disable interrupts before calling the specified function + * - to enable interrupts after callling the specified function + */ + int opcode = ( COMMTHRD_OPCODE_CORE0 + target ) | + COMMTHRD_OPCODE_CALLFUNC | + COMMTHRD_OPCODE_DISABLEINTONENTRY | + COMMTHRD_OPCODE_ENABLEINTONPOOF ; + + /* + * Configure this interrupt with the kernel. + */ + rc = Kernel_SetCommThreadConfig(interruptID, + opcode, + (uint32_t*)interruptGroup, + handler, + (uint32_t)handler_parm, + (uint32_t)NULL, + (uint32_t)NULL, + (uint32_t)NULL); + if (rc) return rc; + } + + /* + * Invoke the system call to allocate the counters. + * This system call also sets up the DMA DCRs to interrupt when the + * counters hit zero. + */ + rc = Kernel_CounterGroupAllocate( (uint32_t)type, + grp, + num_subgroups, + (uint32_t*)subgroups, + target, + (uint32_t) NULL, /* Handler. Not used */ + (uint32_t*)NULL, /* Handler_parm. Not used */ + (uint32_t) NULL, /* InterruptGroup. Not used */ + (uint32_t*)cg_ptr); + return rc; +} + + +/*! + * \brief Free DMA Counters From A Group + * + * This function is a wrapper around a system call that frees DMA counters + * from the specified group. Counters may be freed in subgroups of + * DMA_NUM_COUNTERS_PER_SUBGROUP counters. + * + * \param[in] grp Group number whose counters are being freed + * (0 to DMA_NUM_COUNTER_GROUPS-1) + * \param[in] num_subgroups Number of subgroups to be freed from the group + * (1-DMA_NUM_COUNTERS_PER_SUBGROUP) + * \param[in] subgroups Pointer to an array of num_subgroups ints where + * the list of subgroups to be freed is provided. + * Each int is the subgroup number + * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1). + * \param[out] cg_ptr Pointer to the structure previously filled in when + * these counters were allocated. Upon successful + * return, this structure is updated to reflect the + * freed counters: + * \li counter[] - Counter structures Pointers to + * freed counters nulled. + * \li permissions - Bits cleared for each freed + * counter. + * + * \retval 0 Successful. Counters freed and cg_ptr structure updated as + * described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \note The kernel may need to synchronize with other cores performing allocates + * or queries. + */ +__INLINE__ int DMA_CounterGroupFree( + int grp, + int num_subgroups, + int *subgroups, + DMA_CounterGroup_t *cg_ptr + ) +{ + return Kernel_CounterGroupFree( grp, + num_subgroups, + (uint32_t*)subgroups, + (uint32_t*)cg_ptr); +} + + + +/*! + * \brief Enable or Disable Counter Overflow and Underflow Interrupts + * + * This function is a wrapper around a system call that enables or disables + * the 4 counter overflow/underflow interrupts for all counters: + * 1. Injection counter overflow + * 2. Injection counter underflow + * 3. Reception counter overflow + * 4. Reception counter underflow + * + * \param[in] enable Specifies whether to enable or disable the interrupts + * 0 = Disable, 1 = Enable. + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * + */ +__INLINE__ int DMA_CounterInterruptControl(unsigned int enable) +{ + return Kernel_ChgCounterInterruptEnables( (uint32_t)enable ); + +} + + + +/* + * ----------------------------------------------------------------------------- + * The following inline functions operate directly on the Hardware DMA Counter. + * Note that MSYNC and MBAR are not performed by these hardware functions... + * it is up to the caller to perform them. + *------------------------------------------------------------------------------ + */ + + +/*! + * \brief Set DMA Hardware Counter Value + * + * Set a DMA hardware counter's value, given a pointer to the hardware counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * \param[in] value The value to be set into the counter + * + * \return None + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + */ +__INLINE__ void DMA_CounterSetValueHw( + DMA_CounterHw_t *c_hw, + unsigned int value + ) +{ + SPI_assert( c_hw != NULL ); + + c_hw->counter = value; +} + + +/*! + * \brief Set DMA Hardware Counter Base + * + * Set a DMA hardware counter's base, given a pointer to the hardware counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * \param[in] pa_base The base physical address to be associated with the + * counter. 16B-aligned 4-bit shifted physical address. + * + * \return None + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + */ +__INLINE__ void DMA_CounterSetBaseHw( + DMA_CounterHw_t *c_hw, + unsigned int pa_base + ) +{ + SPI_assert( c_hw != NULL ); + + c_hw->pa_base = pa_base; +} + + +/*! + * \brief Increment DMA Hardware Counter Value + * + * Increment a DMA hardware counter's value, given a pointer to the hardware + * counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * \param[in] incr The amount to increment the counter by + * + * \return None + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + */ +__INLINE__ void DMA_CounterIncrementHw( + DMA_CounterHw_t *c_hw, + unsigned int incr + ) +{ + SPI_assert( c_hw != NULL ); + + c_hw->increment = incr; +} + + +/*! + * \brief Decrement DMA Hardware Counter Value + * + * Decrement a DMA hardware counter's value, given a pointer to the hardware + * counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * \param[in] decr The amount to decrement the counter by + * + * \return None + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + * \note The counter overflow interrupt will fire as a result of this operation. + * Consider disabling this interrupt. + * + */ +__INLINE__ void DMA_CounterDecrementHw( + DMA_CounterHw_t *c_hw, + unsigned int decr + ) +{ + SPI_assert( c_hw != NULL ); + + /* Decrement the counter by incrementing with a large value, which will + * cause the counter to wrap. + */ + c_hw->increment = (0 - decr); +} + + +/*! + * \brief Set Reception DMA Hardware Counter Max + * + * Set a reception DMA hardware counter's maximum payload address, given a + * pointer to the hardware counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * \param[in] pa_max The max physical address to be associated with the + * counter. 16B-aligned 4-bit shifted physical address. + * + * \return None + * + * \pre The caller has ASSERTed that (c_hw != NULL) + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + */ +__INLINE__ void DMA_CounterSetMaxHw( + DMA_CounterHw_t *c_hw, + unsigned int pa_max + ) +{ + c_hw->pa_max = pa_max; +} + + +/*! + * \brief Set DMA Hardware Counter Value and Base + * + * Set a DMA hardware counter's value and base, given a pointer to the hardware + * counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * \param[in] value The value to be set into the counter + * \param[in] pa_base The base physical address to be associated with the + * counter. 16B-aligned 4-bit shifted physical address. + * + * \return None + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + */ +__INLINE__ void DMA_CounterSetValueBaseHw( + DMA_CounterHw_t *c_hw, + unsigned int value, + unsigned int pa_base + ) +{ + SPI_assert( c_hw != NULL ); + + c_hw->counter = value; + c_hw->pa_base = pa_base; + +} + + +/*! + * \brief Set Reception DMA Hardware Counter Value, Base, and Max + * + * Set a reception DMA hardware counter's value, base, and max, given a pointer + * to the hardware counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * \param[in] value The value to be set into the counter + * \param[in] pa_base The base physical address to be associated with the + * counter. 16B-aligned 4-bit shifted physical address. + * \param[in] pa_max The max physical address to be associated with the + * counter. 16B-aligned 4-bit shifted physical address. + * + * \return None + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + */ +__INLINE__ void DMA_CounterSetValueBaseMaxHw( + DMA_CounterHw_t *c_hw, + unsigned int value, + unsigned int pa_base, + unsigned int pa_max + ) +{ + SPI_assert( c_hw != NULL ); + SPI_assert( pa_max >= pa_base); + + c_hw->counter = value; + c_hw->pa_base = pa_base; + c_hw->pa_max = pa_max; +} + + +/*! + * \brief Get DMA Hardware Counter Value + * + * Get a DMA hardware counter's value, given a pointer to the hardware counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * + * \retval value The current value of the counter + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + */ +__INLINE__ unsigned int DMA_CounterGetValueHw( + const DMA_CounterHw_t *c_hw + ) +{ + SPI_assert( c_hw != NULL ); + + return( c_hw->counter ); +} + + +/*! + * \brief Get DMA Hardware Counter Base + * + * Get a DMA hardware counter's base, given a pointer to the hardware counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * + * \retval pa_base The base physical address associated with the counter. + * 16B-aligned 4-bit shifted physical address. + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + */ +__INLINE__ unsigned int DMA_CounterGetBaseHw( + const DMA_CounterHw_t *c_hw + ) +{ + SPI_assert( c_hw != NULL ); + + return( c_hw->pa_base ); +} + + +/*! + * \brief Get Reception DMA Hardware Counter Max + * + * Get a reception DMA hardware counter's max payload address, given a pointer + * to the hardware counter. + * + * \param[in] c_hw Pointer to the hardware counter structure + * + * \retval pa_max The max physical address associated with the counter. + * 16B-aligned 4-bit shifted physical address. + * + * \note No MSYNC or MBAR is done in this function. It is the responsibility + * of the caller to do it. + * + */ +__INLINE__ unsigned int DMA_CounterGetMaxHw( + const DMA_CounterHw_t *c_hw + ) +{ + SPI_assert( c_hw != NULL ); + + return( c_hw->pa_max ); +} + + + + +/* + * ----------------------------------------------------------------------------- + * The following inline functions operate indirectly on a hardware DMA counter + * through the Software DMA Counter structure. + *------------------------------------------------------------------------------ + */ + + + + +/*! + * \brief Set DMA Counter Value + * + * Set a DMA counter's value, given a pointer to the software DMA counter + * structure. + * + * \param[in] c_sw Pointer to the software counter structure + * \param[in] value The value to be set into the counter + * + * \return None + * + */ +__INLINE__ void DMA_CounterSetValue( + DMA_Counter_t *c_sw, + unsigned int value + ) +{ + SPI_assert( c_sw != NULL ); + + DMA_CounterSetValueHw(c_sw->counter_hw_ptr, + value); + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ + +} + + +/*! + * \brief Set DMA Counter Base Address + * + * Set a DMA counter's base address, given a pointer to the software counter + * structure. + * + * \param[in] c_sw Pointer to the software counter structure + * \param[in] va_base_in The base virtual address to be associated with the + * counter. + * + * \retval 0 Success + * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to + * the va_base_in being a bad virtual address. + * + * \post In the software counter structure, va_base and pa_base are set. + * In the hardware counter structure, pa_base is set. + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + * \note The va_base in the software counter structure is the va_base_in rounded + * down to the next lowest 16B-aligned address. The pa_base is the 4-bit + * shifted version of va_base. + * + */ +__INLINE__ int DMA_CounterSetBase( + DMA_Counter_t *c_sw, + void *va_base_in + ) +{ + int rc; + + SPI_assert( c_sw != NULL ); + + /* + * 16-B align the virtual address and store result in software counter + * structure + */ + c_sw->va_base = (char*)( (unsigned)va_base_in & 0xFFFFFFF0 ); + + rc = Kernel_VaTo4bitShiftedPa(c_sw->va_base, + 1, + &(c_sw->pa_base) ); + if ( rc != 0 ) + { + errno = EFAULT; + return (-1); + } + + /* + * Write physical address to the hardware counter + */ + DMA_CounterSetBaseHw(c_sw->counter_hw_ptr, + c_sw->pa_base); + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ + + return (0); + +} + + +/*! + * \brief Increment DMA Counter + * + * Increment a DMA counter's value, given a pointer to the software counter + * structure. + * + * \param[in] c_sw Pointer to the software counter structure + * \param[in] incr The amount to increment the counter by + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterIncrement( + DMA_Counter_t *c_sw, + unsigned int incr + ) +{ + SPI_assert( c_sw != NULL ); + + DMA_CounterIncrementHw(c_sw->counter_hw_ptr, + incr); + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ +} + + +/*! + * \brief Decrement DMA Counter + * + * Decrement a DMA counter's value, given a pointer to the software counter + * structure. + * + * \param[in] c_sw Pointer to the software counter structure + * \param[in] decr The amount to decrement the counter by + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterDecrement( + DMA_Counter_t *c_sw, + unsigned int decr + ) +{ + SPI_assert( c_sw != NULL ); + + DMA_CounterDecrementHw(c_sw->counter_hw_ptr, + decr); + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ +} + + +/*! + * \brief Set DMA Counter Max Address + * + * Set a DMA counter's max address, given a pointer to the software counter + * structure. + * + * \param[in] c_sw Pointer to the software counter structure + * \param[in] va_max_in The max virtual address to be associated with the + * counter. + * + * \retval 0 Success + * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to + * the va_max_in being a bad virtual address. + * + * \post In the software counter structure, va_max and pa_max are set. + * In the hardware counter structure, pa_max is set. + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + * \note The va_max in the software counter structure is the va_max_in rounded + * up to the next highest 16B-aligned address. The pa_max is the 4-bit + * shifted version of va_max. + * + */ +__INLINE__ int DMA_CounterSetMax( + DMA_Counter_t *c_sw, + void *va_max_in + ) +{ + int rc; + + SPI_assert( c_sw != NULL ); + + /* + * Round up to 16B boundary and 16-B align the virtual address and store + * result in software counter structure. + */ + c_sw->va_max = (char*) ( (unsigned)va_max_in & 0xFFFFFFF0 ); + if ( c_sw->va_max != va_max_in ) c_sw->va_max = (char*)c_sw->va_max + 0x00000010; + + /* + * Get the 16B-aligned 4-bit shifted physical address from the virtual address. + */ + rc = Kernel_VaTo4bitShiftedPa(c_sw->va_max, + 1, + &(c_sw->pa_max) ); + + if ( rc != 0 ) + { + errno = EFAULT; + return (-1); + } + + /* + * Write physical address to the hardware counter + */ + DMA_CounterSetMaxHw(c_sw->counter_hw_ptr, + c_sw->pa_max); + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ + + return (0); + +} + + +/*! + * \brief Set DMA Counter Value and Base Address + * + * Set a DMA counter's value and base address, given a pointer to the software + * counter structure. + * + * \param[in] c_sw Pointer to the software counter structure + * \param[in] value The value to be set into the counter + * \param[in] va_base_in The base virtual address to be associated with the + * counter. + * + * \retval 0 Success + * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to + * the va_base_in being a bad virtual address. + * + * \post In the software counter structure, va_base and pa_base are set. + * In the hardware counter structure, pa_base and value are set. + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + * \note The va_base in the software counter structure is the va_base_in rounded + * down to the next lowest 16B-aligned address. The pa_base is the 4-bit + * shifted version of va_base. + * + */ +__INLINE__ int DMA_CounterSetValueBase( + DMA_Counter_t *c_sw, + unsigned int value, + void *va_base_in + ) +{ + int rc=0; + + SPI_assert( c_sw != NULL ); + + /* + * 16-B align the virtual address and store result in software counter + * structure + */ + c_sw->va_base = (char*) ( (unsigned)va_base_in & 0xFFFFFFF0 ); + + /* + * Get the 16B-aligned 4-bit shifted physical address from the virtual address. + */ + rc = Kernel_VaTo4bitShiftedPa(c_sw->va_base, + 1, + &(c_sw->pa_base) ); + if ( rc != 0 ) + { + errno = EFAULT; + return (-1); + } + + /* + * Write the value and physical address to the hardware counter + */ + DMA_CounterSetValueBaseHw(c_sw->counter_hw_ptr, + value, + c_sw->pa_base ); + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ + + return (0); +} + + +/*! + * \brief Set DMA Counter Value, Base Address, and Max Address + * + * Set a reception DMA counter's value, base address, and max address, given a + * pointer to the software counter structure. + * + * \param[in] c_sw Pointer to the software counter structure + * \param[in] value The value to be set into the counter + * \param[in] va_base_in The base virtual address to be associated with the + * counter. + * \param[in] va_max_in The max virtual address to be associated with the + * counter. + * + * \retval 0 Success + * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to + * the va_base_in or va_max_in being a bad virtual address. + * + * \post In the software counter structure, va_base, pa_base, va_max, and pa_max + * are set. In the hardware counter structure, pa_base, pa_max, and value + * are set. + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + * \note The va_base in the software counter structure is the va_base_in rounded + * down to the next lowest 16B-aligned address. The pa_base is the 4-bit + * shifted version of va_base. + * + * \note The va_max in the software counter structure is the va_max_in rounded + * up to the next highest 16B-aligned address. The pa_max is the 4-bit + * shifted version of va_max. + * + */ +__INLINE__ int DMA_CounterSetValueBaseMax( + DMA_Counter_t *c_sw, + unsigned int value, + void *va_base_in, + void *va_max_in + ) +{ + int rc=0; + void *va_base, *va_max; + unsigned int pa_base, pa_max; + + SPI_assert( c_sw != NULL ); + + /* + * Process the base address: + * - 16-B align the virtual address and store result in software counter + * structure + * - Get the 16B-aligned 4-bit shifted physical address from the virtual + * address. + */ + va_base = c_sw->va_base = (char*) ( (unsigned)va_base_in & 0xFFFFFFF0 ); + + rc = Kernel_VaTo4bitShiftedPa(va_base, + 1, + &pa_base ); + if ( rc != 0 ) + { + errno = EFAULT; + return (-1); + } + + c_sw->pa_base = pa_base; + + /* + * Process the max address: + * - 16B align the virtual address and store result in software counter structure. + * Note: we can't round up or the address may be one byte out of range. + * - Get the 16B-aligned 4-bit shifted physical address from the virtual + * address. + */ + va_max = (char*) ( (unsigned)va_max_in & 0xFFFFFFF0 ); + + rc = Kernel_VaTo4bitShiftedPa(va_max, + 1, + &pa_max ); +/* printf("SetValueBaseMax: va_max_in=0x%08x, va_max=0x%08x, pa_max=0x%08x, rc=%d\n",(unsigned)va_max_in, (unsigned)va_max,pa_max,rc); */ +/* fflush(stdout); */ + if ( rc != 0 ) + { + errno = EFAULT; + return (-1); + } + + c_sw->pa_max = pa_max; + + /* + * Write the value, base, and max to the hardware counter + */ + DMA_CounterSetValueBaseMaxHw(c_sw->counter_hw_ptr, + value, + pa_base, + pa_max); + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ + + return (0); +} + + +/*! + * \brief Get DMA Counter Value + * + * Get a DMA counter's value, given a pointer to the software counter + * structure. + * + * \param[in] c_sw Pointer to the software counter structure + * + * \retval value The value of the specified counter + * + * \note This function does an MSYNC after fetching the counter's value + * to ensure that the data that was just DMA'd is available to all + * cores. + * + */ +__INLINE__ unsigned int DMA_CounterGetValue( + const DMA_Counter_t *c_sw + ) +{ + unsigned int val; + + SPI_assert( c_sw != NULL ); + + val = DMA_CounterGetValueHw( c_sw->counter_hw_ptr ); + + _bgp_msync(); + + return val; + +} + + +/*! + * \brief Get DMA Counter Value with No Msync + * + * Get a DMA counter's value, given a pointer to the software counter + * structure. No Msync is done. It is up to the caller to do it, + * if necessary. + * + * \param[in] c_sw Pointer to the software counter structure + * + * \retval value The value of the specified counter + * + */ +__INLINE__ unsigned int DMA_CounterGetValueNoMsync( + const DMA_Counter_t *c_sw + ) +{ + unsigned int val; + + SPI_assert( c_sw != NULL ); + + val = DMA_CounterGetValueHw( c_sw->counter_hw_ptr ); + + return val; + +} + + +/*! + * \brief Get DMA Base Address + * + * Get a DMA counter's base virtual address, given a pointer to the software + * counter structure. + * + * \param[in] c_sw Pointer to the software counter structure + * + * \retval va_base The base virtual address associated with the specified + * counter + * + * \note This returns the shadow va_base directly out of the software counter + * structure. This should correspond with the physical address in the + * hardware counter, but it is a rounded-down-to-the-previous-16B-boundary + * version of the actual base virtual address of the buffer the caller is + * working with. + * + */ +__INLINE__ void * DMA_CounterGetBase( + const DMA_Counter_t *c_sw + ) +{ + SPI_assert( c_sw != NULL ); + + return( c_sw->va_base ); +} + + +/*! + * \brief Get Reception DMA Max Address + * + * Get a reception DMA counter's max virtual address, given a pointer to + * the software counter structure. + * + * \param[in] c_sw Pointer to the software counter structure + * + * \retval va_max The max virtual address associated with the specified + * counter + * + * \note This returns the shadow va_max directly out of the software counter + * structure. This should correspond with the physical address in the + * hardware counter, but it is a rounded-up-to-the-next-16B-boundary + * version of the actual max virtual address of the buffer the caller is + * working with. + * + */ +__INLINE__ void *DMA_CounterGetMax( + const DMA_Counter_t *c_sw + ) +{ + SPI_assert( c_sw != NULL ); + + return( c_sw->va_max ); +} + + +/*! + * \brief Get Offset from DMA Base Address + * + * Given a virtual address, get the offset from the base address associated with + * a counter. + * + * \param[in] c_sw Pointer to the software counter structure + * \param[in] va Virtual address whose offset from the counter's base is + * to be returned. + * \param[in] length The number of bytes in the buffer pointed to by va. + * \param[in] coreNum The number of the core in which the virtual + * address resides (0 to DMA_MAX_NUM_CORES). + * + * \retval offset The offset of the va from the counter's base. + * + * \note This uses the counter's physical base address and the application's + * memory segments (see DMA_CounterAppSegment_t). + * + * \note It is assumed that if the coreNum is not our core, then the counter's + * base address (used in calculating the offset) is the smallest physical + * address accessible from user space on coreNum + * (DMA_CounterMinPaAccessibleFromUserMode[coreNum]). + * + */ +__INLINE__ unsigned int DMA_CounterGetOffsetFromBase( + const DMA_Counter_t *c_sw, + void *va, + unsigned int length, + unsigned int coreNum + ) +{ + SPI_assert( c_sw != NULL ); + SPI_assert( va != NULL ); + SPI_assert ( coreNum >= 0 ); + SPI_assert ( coreNum <= DMA_MAX_NUM_CORES ); + { + DMA_CounterAppSegment_t *appSegmentArray = DMA_CounterGetAppSegments( coreNum ); + uint32_t numAppSegments; + uint32_t i; + uint32_t segmentVaBase; + uint32_t offset; + uint32_t ourCoreNum = Kernel_PhysicalProcessorID(); + uint32_t counterPaBase; + + + /* Determine which application segment the virtual address is in. */ + /* First, check the last app segment accessed. */ + i = DMA_CounterCachedAppSegmentIndex[coreNum]; + segmentVaBase = appSegmentArray[i].va_base; + if ( ! ( ((uint32_t)va >= segmentVaBase) && + ((uint32_t)va - segmentVaBase < appSegmentArray[i].length) ) ) + { + /* It is not the last app segment accessed. Search them. */ + numAppSegments = DMA_CounterGetNumAppSegments(); + for (i=0; i<numAppSegments; i++) + { + segmentVaBase = appSegmentArray[i].va_base; + if ( ((uint32_t)va >= segmentVaBase) && + ((uint32_t)va - segmentVaBase < appSegmentArray[i].length) ) + break; + } + SPI_assert(i < numAppSegments); + DMA_CounterCachedAppSegmentIndex[coreNum] = i; + } + + /* + * Make sure buffer fits in app segment. + */ + if ( ( (uint32_t)va + length - 1 ) > appSegmentArray[i].va_max ) + { + printf("DMA_CounterGetOffsetFromBase: Buffer 0x%08x of length %d is out of bounds. Check length.\n", + (unsigned)va, length); + SPI_assert(0); + } + + /* + * If coreNum is our core, use the offset from our core's counter base to + * calculate the DMA offset. + * Otherwise, assume the counter base is the smallest physical address + * accessible from user space on coreNum and use that. + */ + if ( ourCoreNum == coreNum ) + counterPaBase = c_sw->pa_base; + else + counterPaBase = DMA_CounterMinPaAccessibleFromUserMode[coreNum]; + + /* + * If the base physical address of the application segment found above is + * greater than or equal to the counter's base physical address (typical + * case), proceed with the calculation based on that. + * + * Otherwise, use a slightly different calculation (see else leg). + */ + if ( appSegmentArray[i].pa_base >= counterPaBase ) + { + /* + * Calculate the offset from the counter base: + * - offset from app segment's virtual address base (va - segmentVaBase) + + * - segment's physical base (shifted) - counter's base (shifted) * 16 + */ + offset = + ((unsigned)va - segmentVaBase) + + ( (appSegmentArray[i].pa_base - counterPaBase) << 4 ); + +/* printf("GetOffsetFromBase: va=0x%08x, length=%d, offset=0x%08x, index=%d, segmentVaBase=0x%08x, appSegmentArrayPaBase=0x%08x, counterBase=0x%08x\n",(unsigned)va, length, offset, i, */ +/* segmentVaBase, appSegmentArray[i].pa_base, counterPaBase); */ +/* fflush(stdout); */ + } + /* + * Handle the case where the counter's base exceeds the app segment's base. + * This occurs when the counter's base is set to the base of the buffer + * rather than the min base of all the app segments. + */ + else + { + offset = + ((unsigned)va - segmentVaBase) - + ( (counterPaBase - appSegmentArray[i].pa_base) << 4 ); + +/* printf("GetOffsetFromBase2: va=0x%08x, length=%d, offset=0x%08x, index=%d, segmentVaBase=0x%08x, appSegmentArrayPaBase=0x%08x, counterBase=0x%08x\n",(unsigned)va, length, offset, i, */ +/* segmentVaBase, appSegmentArray[i].pa_base, counterPaBase); */ +/* fflush(stdout); */ + } + + return ( offset ); + } +} + + + + +/* + * ------------------------------------------------------------------------------ + * + * The following functions access counters by specifying the group pointer and + * counter_id. + * + * ------------------------------------------------------------------------------ + */ + + + + +/*! + * \brief Set DMA Counter Value using a Counter ID + * + * Set a DMA counter's value, given a counter group structure and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when + * the counter was allocated + * \param[in] counter_id Identifier of the counter being set + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * \param[in] value The value to be set into the counter + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterSetValueById( + DMA_CounterGroup_t *cg_ptr, + int counter_id, + unsigned int value + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + DMA_CounterSetValue( &cg_ptr->counter[counter_id], + value ); + + /* Note: it is assumed that the above function call performs an MBAR */ +} + + +/*! + * \brief Set DMA Counter Base Address using a Counter ID + * + * Set a DMA counter's base address, given a counter group structure and + * counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when + * the counter was allocated. + * \param[in] counter_id Identifier of the counter being set + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * \param[in] va_base_in The base virtual address to be associated with the + * counter. + * + * \retval 0 Success + * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to + * the va_base_in being a bad virtual address. + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ int DMA_CounterSetBaseById( + DMA_CounterGroup_t *cg_ptr, + int counter_id, + void *va_base_in + ) +{ + int rc; + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + rc = DMA_CounterSetBase( &cg_ptr->counter[counter_id], + va_base_in ); + + /* Note: it is assumed that the above function call performs an MBAR */ + + return rc; +} + + +/*! + * \brief Increment DMA Counter using a Counter ID + * + * Increment a DMA counter's value, given a counter group structure and + * counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when + * the counter was allocated. + * \param[in] counter_id Identifier of the counter being incremented + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * \param[in] incr The amount to increment the counter by + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterIncrementById( + DMA_CounterGroup_t *cg_ptr, + int counter_id, + unsigned int incr + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + DMA_CounterIncrement( &cg_ptr->counter[counter_id], + incr ); + + /* Note: it is assumed that the above function call performs an MBAR */ +} + + +/*! + * \brief Decrement DMA Counter using a Counter ID + * + * Decrement a DMA counter's value, given a counter group structure and + * counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when + * the counter was allocated. + * \param[in] counter_id Identifier of the counter being decremented + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * \param[in] decr The amount to decrement the counter by + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterDecrementById( + DMA_CounterGroup_t *cg_ptr, + int counter_id, + unsigned int decr + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + DMA_CounterDecrement( &cg_ptr->counter[counter_id], + decr ); + + /* Note: it is assumed that the above function call performs an MBAR */ +} + + +/*! + * \brief Set Reception DMA Counter Max Address using a Counter ID + * + * Set a reception DMA counter's base address, given a counter group structure + * and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when + * the counter was allocated. + * \param[in] counter_id Identifier of the counter being set + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * \param[in] va_max_in The max virtual address to be associated with the + * counter. + * + * \retval 0 Success + * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to + * the va_max_in being a bad virtual address. + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ int DMA_CounterSetMaxById( + DMA_CounterGroup_t *cg_ptr, + int counter_id, + void *va_max_in + ) +{ + int rc; + + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + rc = DMA_CounterSetMax( &cg_ptr->counter[counter_id], + va_max_in ); + + /* Note: it is assumed that the above function call performs an MBAR */ + + return rc; + +} + + +/*! + * \brief Get DMA Counter Value using a Counter ID + * + * Get a DMA counter's value, given a counter group structure and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when + * the counter was allocated. + * \param[in] counter_id Identifier of the counter + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \retval value The value of the counter + * + */ +__INLINE__ unsigned int DMA_CounterGetValueById( + const DMA_CounterGroup_t *cg_ptr, + const int counter_id + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + return ( DMA_CounterGetValue( &cg_ptr->counter[counter_id] ) ); +} + + +/*! + * \brief Get DMA Counter Base Address using a Counter ID + * + * Get a DMA counter's base virtual address, given a counter group structure and + * counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] counter_id Identifier of the counter + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \retval va_base The base virtual address associated with the specified + * counter + * + * \note This returns the shadow va_base directly out of the software counter + * structure. This should correspond with the physical address in the + * hardware counter, but it is a rounded-down-to-the-previous-16B-boundary + * version of the actual base virtual address of the buffer the caller is + * working with. + * + */ +__INLINE__ void * DMA_CounterGetBaseById( + const DMA_CounterGroup_t *cg_ptr, + int counter_id + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + return( DMA_CounterGetBase( &cg_ptr->counter[counter_id] ) ); +} + + +/*! + * \brief Get Offset from DMA Base Address using a Counter ID + * + * Given a virtual address, get the offset from the base address associated with + * the specified counter. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when + * the counter was allocated + * \param[in] counter_id Identifier of the counter + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * \param[in] va Virtual address whose offset from the counter's base + * is to be returned. + * \param[in] length The number of bytes in the buffer pointed to by va. + * \param[in] coreNum The number of the core in which the virtual + * address resides (0 to DMA_MAX_NUM_CORES). + * + * \retval offset The offset of the va from the counter's base. + * + * \note This works with the shadow va_base directly out of the software counter + * structure. This should correspond with the physical address in the + * hardware counter, but it is a rounded-down-to-the-previous-16B-boundary + * version of the actual base virtual address of the buffer the caller is + * working with. + * + * \note No effort is given to flag the case where va is less than the base + * address. In that case, (va - va_base) is returned, whatever that is. + * + */ +__INLINE__ unsigned int DMA_CounterGetOffsetFromBaseById( + const DMA_CounterGroup_t *cg_ptr, + int counter_id, + void *va, + unsigned int length, + unsigned int coreNum + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); +/* printf("Getting offset from counter %d for core %d\n",counter_id,coreNum); */ + return( DMA_CounterGetOffsetFromBase( &cg_ptr->counter[counter_id], + va, + length, + coreNum ) ); +} + + +/*! + * \brief Get Reception DMA Counter Max Address Using a Counter ID + * + * Get a reception DMA counter's maximum virtual address, given a counter group + * structure and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] counter_id Identifier of the counter + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \retval va_max The virtual address of the max of the counter + * + * \note This returns the shadow va_max directly out of the software counter + * structure. This should correspond with the physical address in the + * hardware counter, but it is a rounded-up-to-the-next-16B-boundary + * version of the actual max virtual address of the buffer the caller is + * working with. + * + */ +__INLINE__ void * DMA_CounterGetMaxById( + const DMA_CounterGroup_t *cg_ptr, + const int counter_id + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + return ( DMA_CounterGetMax( &cg_ptr->counter[counter_id] ) ); +} + + +/*! + * \brief Set DMA Counter Value and Base Address using a Counter ID + * + * Set a DMA counter's value and base address, given a counter group structure + * and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] counter_id Identifier of the counter being set + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * \param[in] value The value to be set into the counter + * \param[in] va_base_in The base virtual address to be associated with the + * counter. + * + * \retval 0 Success + * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to + * the va_base_in being a bad virtual address. + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ int DMA_CounterSetValueBaseById( + DMA_CounterGroup_t *cg_ptr, + int counter_id, + unsigned int value, + void *va_base_in + ) +{ + int rc; + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + rc = DMA_CounterSetValueBase( &cg_ptr->counter[counter_id], + value, + va_base_in ); + + /* Note: it is assumed that the above function call performs an MBAR */ + + return rc; +} + + + + +/*! + * \brief Set Reception DMA Counter Value, Base Address, and Max Address using + * a Counter ID + * + * Set a reception DMA counter's value, base address, and max address, given a + * counter group structure and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] counter_id Identifier of the counter being set + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * \param[in] value The value to be set into the counter + * \param[in] va_base_in The base virtual address to be associated with the + * counter. + * \param[in] va_max_in The max virtual address to be associated with the + * counter. + * + * \retval 0 Success + * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to + * the va_base_in or va_max_in being a bad virtual address. + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ int DMA_CounterSetValueBaseMaxById( + DMA_CounterGroup_t *cg_ptr, + int counter_id, + unsigned int value, + void *va_base_in, + void *va_max_in + ) +{ + int rc; + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + rc = DMA_CounterSetValueBaseMax( &cg_ptr->counter[counter_id], + value, + va_base_in, + va_max_in ); + + /* Note: it is assumed that the above function call performs an MBAR */ + + return rc; + +} + + +/*! + * \brief Enable DMA Counter using a Counter ID + * + * Enable a DMA counter, given a counter group structure and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] counter_id Identifier of the counter being enabled + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterSetEnableById( + DMA_CounterGroup_t *cg_ptr, + int counter_id + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + SPI_assert( cg_ptr->status_ptr != 0); + + { + /* Enable the counter by writing 1 to the appropriate bit */ + int r = DMA_COUNTER_GROUP_WORD_ID(counter_id); + int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id); + cg_ptr->status_ptr->enable[r] = _BN(c); + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ + } +} + + +/*! + * \brief Disable DMA Counter using a Counter ID + * + * Disable a DMA counter, given a counter group structure and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] counter_id Identifier of the counter being disabled + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterSetDisableById( + DMA_CounterGroup_t *cg_ptr, + int counter_id + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + SPI_assert( cg_ptr->status_ptr != 0); + + { + /* Disable the counter by writing 1 to the appropriate bit */ + int r = DMA_COUNTER_GROUP_WORD_ID(counter_id); + int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id); + cg_ptr->status_ptr->disable[r] = _BN(c); + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ + } +} + + +/*! + * \brief Determine Whether a DMA Counter is Enabled using a Counter ID + * + * Determine whether a DMA counter is enabled, given a counter group structure + * and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] counter_id Identifier of the counter being queried + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \retval 0 The counter is disabled + * \retval 1 The counter is enabled + * + */ +__INLINE__ int DMA_CounterGetEnabledById( + const DMA_CounterGroup_t *cg_ptr, + int counter_id + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + SPI_assert( cg_ptr->status_ptr != 0); + + { + /* Return 0 or 1 if counter is disabled/enabled */ + int r = DMA_COUNTER_GROUP_WORD_ID(counter_id); + int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id); + if ( ( cg_ptr->status_ptr->enabled[r] & _BN(c) ) == 0 ) {return 0;} + else { return 1;} + } +} + + +/*! + * \brief Determine Whether a DMA Counter is Has Hit Zero using a Counter ID + * + * Determine whether a DMA counter has hit zero, given a counter group structure + * and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] counter_id Identifier of the counter being queried + * (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \retval 0 The counter has not hit zero + * \retval 1 The counter has hit zero + * + * \note This function does an MSYNC after determining that the counter has hit + * zero to ensure that the data that was just DMA'd is available to all + * cores. The msync is only done if this is a reception counter group, + * since there is nothing to sync for injection counters that have hit zero. + * + */ +__INLINE__ int DMA_CounterGetHitZeroById( + const DMA_CounterGroup_t *cg_ptr, + int counter_id + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + SPI_assert( cg_ptr->status_ptr != 0); + + { + /* Return 0 or 1 if counter has hit zero */ + int r = DMA_COUNTER_GROUP_WORD_ID(counter_id); + int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id); + if ( ( cg_ptr->status_ptr->hit_zero[r] & _BN(c) ) == 0 ) {return 0;} + else { + /* By convention, we assume that if counter has hit zero, then it will be + * used. This requires an msync to ensure snoops from the DMA arbiter + * have hit the cores. That is, the data that was just DMA'd is available + * to all cores. + * + * Furthermore, If we just put a _bgp_msync() here, it could get + * speculatively executed and withdrawn even if the counter hasn't hit zero, + * so we call a special version of this function that ensures the speculation + * does not occur. + * + * It only needs to be done for reception counters since there is nothing + * to sync when sending data. + */ + if ( cg_ptr->type == DMA_Type_Reception ) _bgp_msync_nonspeculative(); + return 1; + } + } +} + + +/*! + * \brief Clear a DMA Counter's Hit Zero Status using a Counter ID + * + * Clear a DMA counter's "hit zero" status, given a counter group structure + * and counter ID. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] counter_id Identifier of the counter whose "hit zero" status is + * being cleared (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterClearHitZeroById( + DMA_CounterGroup_t *cg_ptr, + int counter_id + ) +{ + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + SPI_assert( cg_ptr->status_ptr != 0); + + { + /* Clear the hit zero bit of a counter */ + int r = DMA_COUNTER_GROUP_WORD_ID(counter_id); + int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id); + cg_ptr->status_ptr->clear_hit_zero[r] = _BN(c) ; + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ + } +} + + +/* + * ------------------------------------------------------------------------------ + * + * The following functions manipulate or get the status of multiple counters + * + * ------------------------------------------------------------------------------ + */ + + +/*! + * \brief Enable Multiple DMA Counters + * + * Enable multiple DMA counters, given a counter group structure and mask. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] reg Identifies the "word" (0 or 1) of the counters + * being manipulated. This is the index into the + * enable array. + * \param[in] counterBits Identifies which counters in the "word" are being + * manipulated. + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterSetEnable( + DMA_CounterGroup_t *cg_ptr, + int reg, + unsigned int counterBits + ) +{ + SPI_assert( cg_ptr != NULL ); + SPI_assert( ( ( reg == 0 ) || ( reg == 1 ) ) ); + SPI_assert( counterBits == (counterBits & cg_ptr->permissions[reg]) ); + SPI_assert( cg_ptr->status_ptr != 0); + + cg_ptr->status_ptr->enable[reg] = counterBits; + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ +} + + +/*! + * \brief Disable Multiple DMA Counters + * + * Disable multiple DMA counters, given a counter group structure and mask. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] reg Identifies the "word" (0 or 1) of the counters + * being manipulated. This is the index into the + * disable array. + * \param[in] counterBits Identifies which counters in the "word" are being + * manipulated. + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterSetDisable(DMA_CounterGroup_t *cg_ptr, + int reg, + unsigned int counterBits + ) +{ + SPI_assert( cg_ptr != NULL ); + SPI_assert( ( ( reg == 0 ) || ( reg == 1 ) ) ); + SPI_assert( counterBits == (counterBits & cg_ptr->permissions[reg]) ); + SPI_assert( cg_ptr->status_ptr != 0); + + cg_ptr->status_ptr->disable[reg] = counterBits; + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ +} + + +/*! + * \brief Get Enabled DMA Counters + * + * Get the enabled status of DMA counters, given a counter group structure + * and "word". + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] reg Identifies the "word" (0 or 1) of the counters + * being queried. This is the index into the + * enabled array. + * + * \return 32 bit mask indicating which counters in the specified word are enabled. + * Only the counters that the caller has allocated will have their status + * returned. The status for other counters will be 0. + * + */ +__INLINE__ unsigned int DMA_CounterGetEnabled( + const DMA_CounterGroup_t *cg_ptr, + int reg + ) +{ + SPI_assert( ( ( cg_ptr != NULL ) && + ( ( reg == 0 ) || ( reg == 1 ) ) ) ); + SPI_assert( cg_ptr->status_ptr != 0); + + return (cg_ptr->permissions[reg] & cg_ptr->status_ptr->enabled[reg]); +} + + +/*! + * \brief Get Hit Zero Status of DMA Counters + * + * Get the "hit zero" status of DMA counters, given a counter group structure + * and "word". + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] reg Identifies the "word" (0 or 1) of the counters + * being queried. This is the index into the + * hit zero array. + * + * \return 32 bit mask indicating which counters in the specified word hit zero. + * Only the counters that the caller has allocated will have their status + * returned. The status for other counters will be 0. + * + * \note This function does an MSYNC after determining that the counter has hit + * zero to ensure that the data that was just DMA'd is available to all + * cores. The msync is only done if this is a reception counter group, + * since there is nothing to sync for injection counters that have hit zero. + * + */ +__INLINE__ unsigned int DMA_CounterGetHitZero( + const DMA_CounterGroup_t *cg_ptr, + int reg + ) +{ + unsigned int x; + + SPI_assert( ( ( cg_ptr != NULL ) && + ( ( reg == 0 ) || ( reg == 1 ) ) ) ); + SPI_assert( cg_ptr->status_ptr != 0); + + x = cg_ptr->status_ptr->hit_zero[reg]; + + if ( x != 0 ) { + + x &= cg_ptr->permissions[reg]; + + if ( ( cg_ptr->type == DMA_Type_Reception ) && + ( x != 0 ) ) + _bgp_msync_nonspeculative(); + + } + + return (x); +} + + +/*! + * \brief Get Hit Zero Status of All DMA Counters In the Specified Group + * + * Get the "hit zero" status of all DMA counters in the group specified by the + * counter group structure. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when + * the counter was allocated. + * \param[in,out] word0 Pointer to the first status word, for the first 32 + * counters. + * \param[in,out] word1 Pointer to the second status word, for the second 32 + * counters. + * + * \return word0 and word1 are set to the status of the counters. + * Only the counters that the caller has allocated will have their + * status returned. The status for other counters will be 0. + * + * \note This function does an MSYNC after determining that at least 1 counter + * has hit zero to ensure that the data that was just DMA'd is available + * to all cores. The msync is only done if this is a reception counter + * group, since there is nothing to sync for injection counters that have + * hit zero. + * + */ +__INLINE__ void DMA_CounterGetAllHitZero( + const DMA_CounterGroup_t *cg_ptr, + unsigned int *word0, + unsigned int *word1 + ) +{ + unsigned int x,y; + + SPI_assert( ( cg_ptr != NULL ) && + ( word0 != NULL ) && + ( word1 != NULL ) ); + SPI_assert( cg_ptr->status_ptr != 0 ); + + x = cg_ptr->status_ptr->hit_zero[0]; + y = cg_ptr->status_ptr->hit_zero[1]; + + if ( (x | y) != 0 ) { + x &= cg_ptr->permissions[0]; + y &= cg_ptr->permissions[1]; + + if ( ( cg_ptr->type == DMA_Type_Reception ) && + ( (x | y) != 0 ) ) + _bgp_msync_nonspeculative(); + } + + *word0 = x; + *word1 = y; + + return; +} + + +/*! + * \brief Clear Hit Zero Status of DMA Counters + * + * Clear the "hit zero" status of DMA counters, given a counter group structure, + * a "word", and a mask of counters. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counter was allocated. + * \param[in] reg Identifies the "word" (0 or 1) of the counters + * being manipulated. This is the index into the + * clear_hit_zero array. + * \param[in] counterBits Identifies which counters in the "word" are being + * manipulated. + * + * \return None + * + * \note This function does an MBAR after setting the counter to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + * + */ +__INLINE__ void DMA_CounterGroupClearHitZero( + DMA_CounterGroup_t *cg_ptr, + int reg, + unsigned int counterBits + ) +{ + SPI_assert( cg_ptr != NULL ); + SPI_assert( ( ( reg == 0 ) || ( reg == 1 ) ) ); + SPI_assert( counterBits == (counterBits & cg_ptr->permissions[reg]) ); + SPI_assert( cg_ptr->status_ptr != 0); + + cg_ptr->status_ptr->clear_hit_zero[reg] = counterBits; + + _bgp_mbar(); /* Make sure these writes have been accepted by the memory */ + /* system before continuing */ +} + + +/*! + * \brief Get DMA Counter Group Status + * + * Get the DMA Counter Group Status, given a counter group structure. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counters were allocated. + * + * \return 32 bit mask indicating which subgroups have counters that are enabled and + * have hit zero. Only the subgroups that the caller has allocated will have + * their status returned. The status for other subgroups will be 0. + * + * \note This function does an MSYNC after determining that the counter has hit + * zero to ensure that the data that was just DMA'd is available to all + * cores. The msync is only done if this is a reception counter group, + * since there is nothing to sync for injection counters that have hit zero. + * + */ +__INLINE__ unsigned int DMA_CounterGetGroupStatus( + const DMA_CounterGroup_t *cg_ptr + ) +{ + unsigned int x; + + SPI_assert( cg_ptr != NULL ); + SPI_assert( cg_ptr->status_ptr != 0); + + x = cg_ptr->status_ptr->grp_status; + + if ( x != 0 ) { + + x &= cg_ptr->grp_permissions; + + if ( ( cg_ptr->type == DMA_Type_Reception ) && + ( x != 0 ) ) + _bgp_msync_nonspeculative(); + + } + + return x; +} + + +/*! + * \brief Get DMA Counter Group Number + * + * Get the DMA Counter Group number, given a counter group structure. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counters were allocated. + * + * \return The DMA Counter Group number + * + */ +__INLINE__ int DMA_CounterGetGroupNum( + const DMA_CounterGroup_t *cg_ptr + ) +{ + SPI_assert( cg_ptr != NULL ); + + return cg_ptr->group_id; +} + + +/*! + * \brief Get DMA Counter Global Id + * + * Get the global Id of a DMA Counter, given a counter group structure and a counter Id. + * + * \param[in] cg_ptr Pointer to the structure previously filled in when the + * counters were allocated. + * \param[in] counter_id Identifier of the counter + * + * \return The DMA Counter Global Id (0 to DMA_NUM_COUNTERS-1) + * + */ +__INLINE__ int DMA_CounterGetGlobalId( + const DMA_CounterGroup_t *cg_ptr, + int counter_id + ) +{ + SPI_assert( ( cg_ptr != NULL ) && + ( counter_id >= 0 ) && + ( counter_id < DMA_NUM_COUNTERS_PER_GROUP ) ); + + return( counter_id + (DMA_NUM_COUNTERS_PER_GROUP * cg_ptr->group_id) ); +} + + +/*! + * \brief Get DMA Counter Local Id + * + * Get the local Id of a DMA Counter, given a counter group structure and a Global + * counter Id. + * + * \param[in] counter_id Global Identifier of the counter (0 to DMA_NUM_COUNTERS-1) + * + * \return The DMA Counter Local Id (0 to DMA_NUM_COUNTERS_PER_GROUP-1) + * + */ +__INLINE__ int DMA_CounterGetLocalId( + int counter_id + ) +{ + return( counter_id % DMA_NUM_COUNTERS_PER_GROUP ); +} + + + + +__END_DECLS + + +#endif diff --git a/arch/powerpc/include/spi/DMA_Descriptors.h b/arch/powerpc/include/spi/DMA_Descriptors.h new file mode 100644 index 00000000000000..ae9fc11721ffa7 --- /dev/null +++ b/arch/powerpc/include/spi/DMA_Descriptors.h @@ -0,0 +1,1505 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + +#ifndef _DMA_DESCRIPTORS_H_ /* Prevent multiple inclusion */ +#define _DMA_DESCRIPTORS_H_ + +/*! + * \file spi/DMA_Descriptors.h + * + * \brief DMA SPI Descriptor Definitions and Inline Functions + * + * This header file contains the definition of the DMA_InjDescriptor_t, which is + * put into the tail of an injection fifo to initiate a DMA transfer. + * + * The following defines the terms used for describing the various kinds of + * descriptors: + * - "Torus" means the transfer is between nodes. + * - "Local" means the transfer is within the same node. + * - "Direct-put" means the data is put directly into the destination node's + * memory. + * - "MemFifo" means the packets are put into the destination node's reception + * fifo. + * - "Remote-get" means the packet payload contains an injection descriptor + * to be injected into the destination node's injection fifo. + * - Prefetch-only" means the payload is just pre-fetched into L3. It is not + * transferred to the destination node. + * + * The following are the functions provided for creating injection descriptors: + * - DMA_TorusDirectPutDescriptor + * - DMA_LocalDirectPutDescriptor + * - DMA_LocalPrefetchOnlyDescriptor + * - DMA_TorusRemoteGetDescriptor + * - DMA_LocalRemoteGetDescriptor + * - DMA_TorusMemFifoDescriptor + * - DMA_LocalMemFifoDescriptor + * - DMA_TorusDirectPutBcastDescriptor + * - DMA_TorusMemFifoBcastDescriptor + * + * + * There are also functions for setting or changing specific values in the + * injection descriptors. + * + */ + + + + +#include <common/namespace.h> + + +__BEGIN_DECLS + + +/*! + * \brief __INLINE__ definition + * + * Option 1: + * Make all functions be "static inline": + * - They are inlined if the compiler can do it + * - If the compiler does not inline it, a single copy of the function is + * placed in the translation unit (eg. xxx.c)for use within that unit. + * The function is not externalized for use by another unit...we want this + * so we don't end up with multiple units exporting the same function, + * which would result in linker errors. + * + * Option 2: + * A GNU C model: Use "extern inline" in a common header (this one) and provide + * a definition in a .c file somewhere, perhaps using macros to ensure that the + * same code is used in each case. For instance, in the header file: + * + * \verbatim + #ifndef INLINE + # define INLINE extern inline + #endif + INLINE int max(int a, int b) { + return a > b ? a : b; + } + \endverbatim + * + * ...and in exactly one source file (in runtime/SPI), that is included in a + * library... + * + * \verbatim + #define INLINE + #include "header.h" + \endverbatim + * + * This allows inlining, where possible, but when not possible, only one + * instance of the function is in storage (in the library). + */ +#ifndef __INLINE__ +#define __INLINE__ extern inline +#endif + + + + +#include <bpcore/bgp_types.h> +#include <common/alignment.h> +#include <common/bgp_bitnumbers.h> +#include <spi/DMA_Packet.h> +#include <spi/DMA_Assert.h> + + + + +/*! + * \brief Packet Header - Checksum Skip Bytes + * + * Default number of 2 byte units to skip from the top of a packet before + * including the packet bytes into the running checksum of the torus + * injection fifo where this packet is injected. + * + * 8 corresponds to skipping 16 bytes, which is the DMA packet header size + * (hardware header + software header). + */ +#define DMA_CSUM_SKIP 8 + + +/*! + * \brief Packet Header - Checksum Skip Packet + * + * Default value for the torus injection checksum skip packet bit. + * - 0 includes the packet (excluding the portion designated by DMA_CSUM_SKIP) + * in the checksum. + * - 1 excludes the entire packet from the checksum. + */ +#define DMA_CSUM_BIT 0 + + + + +/*! + * \brief DMA Injection Descriptor Structure + * + */ +typedef struct DMA_InjDescriptor_t +{ + union { + unsigned word1; /*!< For accessing fields as 32-bit word */ + + struct { + unsigned rsvd0 : 24; /*!< 3 bytes: unused */ + + unsigned rsvd1 : 6; /*!< Bits 0-5: unused flags */ + + unsigned prefetch_only : 1; /*!< Bit 6: prefetch only, on local + memcopy: + 0 = Data is both read and written, + 1 = Data is only read. + This bit is ignored for torus + packets. */ + + unsigned local_memcopy : 1; /*!< Bit 7: local memory copy bit: + 0 = The message is a torus message, + 1 = The message is a local copy. */ + }; + }; + + union { + unsigned word2; /*!< For accessing fields as 32-bit word */ + + struct { + unsigned rsvd2 : 24; /*!< 3 bytes: unused */ + + unsigned idma_counterId : 8; /*!< 1 byte: Injection Counter Id. */ + }; + }; + + unsigned base_offset : 32; /*!< 4 bytes: pointer to base address of + message payload. This gets added to + the base address associated with the + idma_counterId injection counter. */ + + unsigned msg_length : 32; /*!< 4 bytes: message length (in bytes) */ + + DMA_PacketHeader_t hwHdr; /*!< DMA Hardware Packet Header */ + +} +DMA_InjDescriptor_t ALIGN_QUADWORD; +/*! + * \todo Change to ALIGN_L1D_CACHE when it works. + * + */ + + +/*! + * \brief Static Info from Personality + * + * The following structure defines information from the personality. + * It is intended to be static so, once the info is retrieved from + * the personality, it does not need to be retrieved again (it is a + * system call to retrieve personality info). + * + */ +typedef struct DMA_PersonalityInfo_t +{ + unsigned int personalityRetrieved; /*!< 0 = Personality Info not + retrieved into this + structure yet. + 1 = Personality Info in this + structure is valid. */ + uint8_t nodeXCoordinate; /*!< X coord of the calling node. */ + uint8_t nodeYCoordinate; /*!< Y coord of the calling node. */ + uint8_t nodeZCoordinate; /*!< Z coord of the calling node. */ + uint8_t xNodes; /*!< X dimension of the block. */ + uint8_t yNodes; /*!< Y dimension of the block. */ + uint8_t zNodes; /*!< Z dimension of the block. */ +} +DMA_PersonalityInfo_t; + + +/*! + * \brief Create a DMA Descriptor For a Torus Direct Put Message + * + * A torus direct put message is one that is sent to another node and its data + * is directly put into memory by the DMA on the destination node...it does + * not go into a reception fifo. + * + * A torus direct-put DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = DMA_CSUM_SKIP. + * - Sk = DMA_CSUM_BIT. + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 1 (Indicates a direct-put packet). + * - Dynamic = Set based on caller's "vc". + * - VC = Set to caller's "vc". + * - X,Y,Z = Set to caller's "x", "y", "z". + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = Destination message offset (from the reception + * counter's base address). Set to caller's recv_offset. + * - rDMA_Counter = Reception counter ID. This counter is located on the + * destination node and contains the base address of the + * message and the message length. Set based on caller's + * recv_ctr_grp_id and recv_ctr_id. + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] x The destination's x coordinate (8 bits). + * \param[in] y The destination's y coordinate (8 bits). + * \param[in] z The destination's z coordinate (8 bits). + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Both of x+ and x- cannot be set at the same + * time...same with y and z. + * \param[in] vc The virtual channel that the packet must go + * into if it fails to win the bypass + * arbitration in the receiving node. + * - 0 = Virtual channel dynamic 0 + * - 1 = Virtual channel dynamic 1 + * - 2 = Virtual channel deterministic bubble + * - 3 = Virtual channel deterministic priority + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_ctr_grp_id Reception counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] recv_ctr_id Reception counter ID (within the recv counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] recv_offset Offset of the payload from the pa_base + * associated with the specified reception + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, all payload bytes are included in the torus injection + * checksum. In the first byte of the torus hardware packet header, + * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0. + * The defaults can be changed by changing DMA_CSUM_SKIP and + * DMA_CSUM_BIT in this include file. + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_ctr_grp_id: + * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. + */ +int DMA_TorusDirectPutDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int x, + unsigned int y, + unsigned int z, + unsigned int hints, + unsigned int vc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_ctr_grp_id, + unsigned int recv_ctr_id, + unsigned int recv_offset, + unsigned int msg_len + ); + + +/*! + * \brief Create a DMA Descriptor For a Local Direct Put Message + * + * A local direct put message is one that is targeted within the same node, and + * its data is directly put into memory by the DMA...it does not go into a + * reception fifo. This is essentially a memcpy via DMA. + * + * A local direct-put DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 1 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used). + * - Sk = 0 (not used). + * - Hint = 0 (not used). + * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id". + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 1 (Indicates a direct-put packet). + * - Dynamic = 0 (not used). + * - VC = 0 (not used). + * - X,Y,Z = 0 (not used). + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = Destination message offset (from the reception + * counter's base address). Set to caller's recv_offset. + * - rDMA_Counter = Reception counter ID. This counter is located on the + * destination node and contains the base address of the + * message and the message length.. Set based on caller's + * recv_ctr_grp_id and recv_ctr_id. + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_ctr_grp_id Reception counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] recv_ctr_id Reception counter ID (within the recv counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] recv_offset Offset of the payload from the pa_base + * associated with the specified reception + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_ctr_grp_id: + * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1) + * The only use for the pid bits is for debug, ie, if headers are + * being saved. + */ +int DMA_LocalDirectPutDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_ctr_grp_id, + unsigned int recv_ctr_id, + unsigned int recv_offset, + unsigned int msg_len + ); + + +/*! + * \brief Create a DMA Descriptor For a Local L3 Prefetch Only Message + * + * A local prefetch is one in which the DMA simply prefetches the send buffer + * into L3. + * + * A local prefetch DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 1 + * - local_memcopy = 1 + * - idma_counterId = Injection counter ID associated with the message being + * prefetched. This counter contains the base address of + * the message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used). + * - Sk = 0 (not used). + * - Hint = 0 (not used). + * - Pid0, Pid1 = 0 (not used). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 1 (Indicates a DMA packet). + * - Dynamic = 0 (not used). + * - VC = 0 (not used). + * - X,Y,Z = 0 (not used). + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = 0 (not used). + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + */ +int DMA_LocalPrefetchOnlyDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int msg_len + ); + + +/*! + * \brief Create a DMA Descriptor For a Torus Remote-Get Message + * + * A torus remote-get message is one that is sent to another node and its data + * is directly put by the DMA into an injection fifo on the destination + * node...it does not go into a reception fifo. Therefore, the payload of this + * message is one (or more) descriptors for another message that is to be sent + * back to the originating node. + * + * By default, we assume that the payload of this remote get packet is a single + * descriptor. Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32. + * For remote gets whose payload is greater than 1 descriptor, the caller can + * change the packet Chunks and msg_length after this function builds the + * default descriptor. + * + * It is also assumed that the payload is NOT checksummed, since it is not + * always reproducible. Things like idma_counterId and base_offset may be + * different on another run, making checksumming inconsistent. + * + * A torus remote-get DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = 32. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used because Sk is 1). + * - Sk = 1 (do not checksum this packet). + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_inj_fifo_id" (see note). + * - Chunks = Set to (2)-1 = 1. + * - Dm = 1 (Indicates a DMA packet). + * - Dynamic = Set based on caller's "vc". + * - VC = Set to caller's "vc". + * - X,Y,Z = Set to caller's "x", "y", "z". + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 1. + * - iDMA_Fifo_ID = Injection fifo ID where the payload will be injected. + * Set based on caller's recv_inj_ctr_grp_id and + * recv_inj_ctr_id. + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] x The destination's x coordinate (8 bits). + * \param[in] y The destination's y coordinate (8 bits). + * \param[in] z The destination's z coordinate (8 bits). + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Both of x+ and x- cannot be set at the same + * time...same with y and z. + * \param[in] vc The virtual channel that the packet must go + * into if it fails to win the bypass + * arbitration in the receiving node. + * - 0 = Virtual channel dynamic 0 + * - 1 = Virtual channel dynamic 1 + * - 2 = Virtual channel deterministic bubble + * - 3 = Virtual channel deterministic priority + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_inj_fifo_grp_id Injection fifo group ID where payload + * will be injected on destination node + * (0 to DMA_NUM_INJ_FIFO_GROUPS-1). + * \param[in] recv_inj_fifo_id Injection fifo ID (within the + * recv_inj_fifo_grp_id group) + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_inj_fifo_grp_id: + * - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. + */ +int DMA_TorusRemoteGetDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int x, + unsigned int y, + unsigned int z, + unsigned int hints, + unsigned int vc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_inj_fifo_grp_id, + unsigned int recv_inj_fifo_id + ); + + +/*! + * \brief Create a DMA Descriptor For a Local Remote-Get Message + * + * A local remote-get message is one whose data is directly put by the DMA into + * an injection fifo on the local node...it does not go into a reception fifo. + * Therefore, the payload of this message is one (or more) descriptors for + * another message that is to be injected on the local node. + * + * By default, we assume that the payload of this remote get packet is a single + * descriptor. Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32. + * For remote gets whose payload is greater than 1 descriptor, the caller can + * change the packet Chunks and msg_length after this function builds the + * default descriptor. + * + * A local remote-get DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 1 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = 32. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used). + * - Sk = 0 (not used). + * - Hint = 0 (Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_inj_fifo_id" (see note). + * - Chunks = Set to (2)-1 = 1. + * - Dm = 1 (Indicates a DMA packet). + * - Dynamic = 0 (not used). + * - VC = 0 (not used). + * - X,Y,Z = 0 (not used). + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 1. + * - iDMA_Fifo_ID = Injection fifo ID where the payload will be injected. + * Set based on caller's inj_ctr_grp_id and inj_ctr_id. + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_inj_fifo_grp_id Injection fifo group ID where payload + * will be injected on local node + * (0 to DMA_NUM_INJ_FIFO_GROUPS-1). + * \param[in] recv_inj_fifo_id Injection fifo ID (within the + * recv_inj_fifo_grp_id group) + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the + * hardware packet header are determined by the recv_inj_fifo_grp_id: + * - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1) + * + */ +int DMA_LocalRemoteGetDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_inj_fifo_grp_id, + unsigned int recv_inj_fifo_id + ); + + +/*! + * \brief Create a DMA Descriptor For a Torus Memory Fifo Message + * + * A torus memory fifo message is one that is sent to another node and its data + * is put into a reception memory fifo by the DMA on the destination node. + * + * A torus memory fifo DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = DMA_CSUM_SKIP. + * - Sk = DMA_CSUM_BIT. + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 0 (Indicates a memory fifo packet). + * - Dynamic = Set based on caller's "vc". + * - VC = Set to caller's "vc". + * - X,Y,Z = Set to caller's "x", "y", "z". + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = 0 (not used). + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg. + * - Func_Id = The registration ID of a function to receive control + * on the destination node to process the packet. + * Set to caller's function_id. + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] x The destination's x coordinate (8 bits). + * \param[in] y The destination's y coordinate (8 bits). + * \param[in] z The destination's z coordinate (8 bits). + * \param[in] recv_fifo_grp_id Reception fifo group ID + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Both of x+ and x- cannot be set at the same + * time...same with y and z. + * \param[in] vc The virtual channel that the packet must go + * into if it fails to win the bypass + * arbitration in the receiving node. + * - 0 = Virtual channel dynamic 0 + * - 1 = Virtual channel dynamic 1 + * - 2 = Virtual channel deterministic bubble + * - 3 = Virtual channel deterministic priority + * \param[in] sw_arg User-defined 24 bits to be placed into the + * packets (bits 8-31). + * \param[in] function_id Function id (8 bit registration ID) of the + * function to receive control on the + * destination node to process packets for this + * message. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, all payload bytes are included in the torus injection + * checksum. In the first byte of the torus hardware packet header, + * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0. + * The defaults can be changed by changing DMA_CSUM_SKIP and + * DMA_CSUM_BIT in this include file. + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_fifo_grp_id: + * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. +*/ +int DMA_TorusMemFifoDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int x, + unsigned int y, + unsigned int z, + unsigned int recv_fifo_grp_id, + unsigned int hints, + unsigned int vc, + unsigned int sw_arg, + unsigned int function_id, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int msg_len + ); + + +/*! + * \brief Create a DMA Descriptor For a Local Memory Fifo Message + * + * A local memory fifo message is one whose data is put into a reception + * memory fifo on the same node by the DMA. + * + * A local memory fifo DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used). + * - Sk = 0 (not used). + * - Hint = 0 (not used). + * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 0 (Indicates a memory fifo packet). + * - Dynamic = 0 (not used). + * - VC = 0 (not used). + * - X,Y,Z = 0 (not used). + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = 0 (not used). + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg. + * - Func_Id = The registration ID of a function to receive control + * on this local node to process the packet. + * Set to caller's function_id. + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] recv_fifo_grp_id Reception fifo group ID + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * \param[in] sw_arg User-defined 24 bits to be placed into the + * packets (bits 8-31). + * \param[in] function_id Function id (8 bit registration ID) of the + * function to receive control on this + * local node to process packets for this + * message. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_fifo_grp_id: + * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1) +*/ +int DMA_LocalMemFifoDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int recv_fifo_grp_id, + unsigned int sw_arg, + unsigned int function_id, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int msg_len + ); + + +/*! + * \brief Create a DMA Descriptor For a Torus Direct Put Broadcast Message + * + * A torus direct put broadcast message is one that is sent to all of the nodes + * in a specified direction along a specified line, its data + * is directly put into memory on the nodes along that line by the DMA on those + * nodes...it does not go into a reception fifo. Only one hint bit can be + * specified, dictating the direction (plus or minus) and line (x, y, or z). + * + * By default, the packet is included in the checksum. Retransmitted packets + * should not be included in the checksum. + * + * By default, the deterministic bubble normal virtual channel is used. + * + * A torus direct-put broadcast DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = DMA_CSUM_SKIP. + * - Sk = DMA_CSUM_BIT. + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 1 (Indicates a direct-put packet). + * - Dynamic = 0 (Deterministic). + * - VC = Virtual Channel: Deterministic Bubble Normal. + * - X,Y,Z = Set according to the hints: + * Two of the directions are set to this node's + * coordinates (no movement in those directions). + * One direction is set to the dest specified + * by the caller. + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = Destination message offset (from the reception + * counter's base address). Set to caller's recv_offset. + * - rDMA_Counter = Reception counter ID. This counter is located on the + * destination node and contains the base address of the + * message and the message length. Set based on caller's + * recv_ctr_grp_id and recv_ctr_id. + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] dest The final torus destination coordinate + * along the line specified by the hints. + * Should not exceed the number of nodes in + * the direction of travel. + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Only one bit may be specified. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_ctr_grp_id Reception counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] recv_ctr_id Reception counter ID (within the recv counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] recv_offset Offset of the payload from the pa_base + * associated with the specified reception + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, all payload bytes are included in the torus injection + * checksum. In the first byte of the torus hardware packet header, + * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0. + * The defaults can be changed by changing DMA_CSUM_SKIP and + * DMA_CSUM_BIT in this include file. + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_ctr_grp_id: + * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. +*/ +int DMA_TorusDirectPutBcastDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int dest, + unsigned int hints, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_ctr_grp_id, + unsigned int recv_ctr_id, + unsigned int recv_offset, + unsigned int msg_len + ); + + +/*! + * \brief Create a DMA Descriptor For a Torus Memory Fifo Broadcast Message + * + * A torus memory fifo broadcast message is one that is sent to all of the nodes + * in a specified direction along a specified line, its data is + * put into a reception memory fifo by the DMA on the destination nodes along + * that line. Only one hint bit can be specified, dictating the direction + * (plus or minus) and line (x, y, or z). + * + * By default, the packet is included in the checksum. Retransmitted packets + * should not be included in the checksum. + * + * By default, the deterministic bubble normal virtual channel is used. + * + * A torus memory fifo broadcast DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = DMA_CSUM_SKIP. + * - Sk = DMA_CSUM_BIT. + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 0 (Indicates a memory fifo packet). + * - Dynamic = 0 (Deterministic). + * - VC = Virtual Channel: Deterministic Bubble Normal. + * - X,Y,Z = Set according to the hints: + * Two of the directions are set to this node's + * coordinates (no movement in those directions). + * One direction is set to the dest specified + * by the caller. + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = 0 (not used). + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg. + * - Func_Id = The registration ID of a function to receive control + * on the destination node to process the packet. + * Set to caller's function_id. + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] dest The final torus destination coordinate + * along the line specified by the hints. + * Should not exceed the number of nodes in + * the direction of travel. + * \param[in] recv_fifo_grp_id Reception fifo group ID + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Only one bit may be specified. + * \param[in] sw_arg User-defined 24 bits to be placed into the + * packets (bits 8-31). + * \param[in] function_id Function id (8 bit registration ID) of the + * function to receive control on the + * destination node to process packets for this + * message. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, all payload bytes are included in the torus injection + * checksum. In the first byte of the torus hardware packet header, + * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0. + * The defaults can be changed by changing DMA_CSUM_SKIP and + * DMA_CSUM_BIT in this include file. + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_fifo_grp_id: + * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. +*/ +int DMA_TorusMemFifoBcastDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int dest, + unsigned int recv_fifo_grp_id, + unsigned int hints, + unsigned int sw_arg, + unsigned int function_id, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int msg_len + ); + + +/*! + * \brief Set or Change the Hint Bits in a Fifo Descriptor + * + * \param[in,out] desc Pointer to descriptor to be set or changed. + * \param[in] hints Hint bits to be set. + * + * \return None + * + */ +__INLINE__ void DMA_SetHints( + DMA_InjDescriptor_t *desc, + unsigned int hints + ) +{ + SPI_assert( desc != NULL ); + desc->hwHdr.Hint = hints; + +} + + +/*! + * \brief Set or Change the Virtual Channel and Dynamic Bit in a Descriptor + * + * \param[in,out] desc Pointer to descriptor to be set or changed. + * \param[in] vc Input virtual channel + * - 0 = Virtual channel dynamic 0 + * - 1 = Virtual channel dynamic 1 + * - 2 = Virtual channel deterministic bubble + * - 3 = Virtual channel deterministic priority + * + * \return None + * + * \post The Dynamic bit is set according to the specified virtual channel. + * + */ +__INLINE__ void DMA_SetVc( + DMA_InjDescriptor_t *desc, + unsigned int vc + ) +{ + SPI_assert( desc != NULL ); + + switch(vc) { + case DMA_PACKET_VC_D0: + case DMA_PACKET_VC_D1: + desc->hwHdr.Dynamic =1; + break; + + case DMA_PACKET_VC_BN: + case DMA_PACKET_VC_BP: + desc->hwHdr.Dynamic =0; + break; + + default: + SPI_assert(0); + } + desc->hwHdr.VC = vc; + +} + + +/*! + * \brief Set Descriptor Pid Bits + * + * Given a pointer to the descriptor and the receive-side counter group number, + * set the Pid0 and Pid1 bits in the torus hardware header portion of the + * descriptor. + * + * \param[in] desc Pointer to injection descriptor + * \param[in] g Reception-side counter group number + * (0 through DMA_NUM_COUNTER_GROUPS). + * + * \return None + * + */ +__INLINE__ void DMA_SetDescriptorPids( + DMA_InjDescriptor_t *desc, + unsigned int g + ) +{ + /* Set the pid bits according to the group id g */ + desc->hwHdr.Pid0 = _GN(g,30); + desc->hwHdr.Pid1 = _GN(g,31); +/* --------------------------------- + The above code performs the following: + + switch(g) { + case 0: + desc->hwHdr.Pid0 = 0; + desc->hwHdr.Pid1 = 0; + break; + + case 1 + desc->hwHdr.Pid0 = 0; + desc->hwHdr.Pid1 = 1; + break; + + case 2 + desc->hwHdr.Pid0 = 1; + desc->hwHdr.Pid1 = 0; + break; + + case 3 + desc->hwHdr.Pid0 = 1; + desc->hwHdr.Pid1 = 1; + break; + + default: + SPI_assert(0); + + } + --------------------------------- */ +} + + +/*! + * \brief Set or Change the Number of Chunks in a Fifo Descriptor + * + * \param[in,out] desc Pointer to the descriptor to be set or + * changed. + * \param[in] packet_chunks Number of 32B chunks in the packet + * (1 through 8). + * + * \return None + * + */ +__INLINE__ void DMA_SetChunks( + DMA_InjDescriptor_t *desc, + unsigned int packet_chunks + ) +{ + SPI_assert( desc != NULL ); + SPI_assert( packet_chunks >=1); + SPI_assert( packet_chunks <=8); + desc->hwHdr.Chunks = (packet_chunks-1) ; +} + + +/*! + * \brief Set or Change the Message Length in a Fifo Descriptor + * + * \param[in,out] desc Pointer to the descriptor to be set or changed. + * \param[in] msg_len Number of bytes in the payload of the message. + * + * \return None + * + */ +__INLINE__ void DMA_SetMessageLength( + DMA_InjDescriptor_t *desc, + unsigned int msg_len + ) +{ + SPI_assert( desc != NULL ); + + desc->msg_length= msg_len; +} + + +/*! + * \brief Change the Checksum Characteristics in a Fifo Descriptor + * + * \param[in,out] desc Pointer to the descriptor to be changed. + * \param[in] csum_skip The number of 2-bytes to skip in the checksum + * (7 bits). + * \param[in] skip The checksum skip attribute: + * 0 = The packets participates in the injection + * checksum. + * 1 = The packet does not participate in the + * injection checksum. + * + * \return None + * + */ +__INLINE__ void DMA_SetInjCsum( + DMA_InjDescriptor_t *desc, + unsigned int csum_skip, + unsigned int skip + ) +{ + SPI_assert( desc != NULL ); + SPI_assert( skip <=1 ); + + desc->hwHdr.CSum_Skip = csum_skip; + desc->hwHdr.Sk = skip; + +} + + +/*! + * \brief Determine the Number of Packet Chunks for the First Packet of a + * Message + * + * Compute the best (largest) packet size in units of 32B chunks given the + * message length. + * + * \param[in] msg_len Message length + * + * \retval numPacketChunks Number of 32B chunks needed in the first packet + * of a message whose length is msg_len. + * This will be a number from 1 through 8. + * \retval 0 This is considered an error, resulting from a + * msg_len = 0. The DMA must send at least 1 byte. + */ +__INLINE__ int DMA_PacketChunks( + unsigned msg_len + ) +{ + /* Do most common case first */ + if (msg_len > 208) return 8; + + /* Error case...the DMA must send at least one byte of data */ + SPI_assert( msg_len > 0); + + /* Basically add in the packet header and round to 32B multiple */ + { + int chunks = ( msg_len - 1 + sizeof(DMA_PacketHeader_t) ) / 32; + return (1+chunks); + } + +} + + +/*! + * \brief Zero Out All Fields a Descriptor + * + * \param[in] desc Pointer to descriptor to be zero'd. + * + * \post The descriptor is zero'd. + * + */ +__INLINE__ void DMA_ZeroOutDescriptor( + DMA_InjDescriptor_t *desc + ) +{ + /* + * Possible optimizations: + * There are 32 bytes in the descriptor and it should be L1 aligned. + * SPI_assert(( desc & 0x000000FF) == 0); // check alignment, not needed if can't + * // easily use double hummer. + * _bgp_dcache_zero_line(desc); //Not allowed with SWOA + * Should be a better way to do this. + */ + + SPI_assert( desc != NULL ); + + { + int *addr = (int *) desc ; + + /* Generates 8 stw's */ + addr[0] = 0; + addr[1] = 0; + addr[2] = 0; + addr[3] = 0; + addr[4] = 0; + addr[5] = 0; + addr[6] = 0; + addr[7] = 0; + } + +} + + + +/*! + * \brief Update the Offset and Length in a Descriptor + * + * \param[in] desc Pointer to descriptor to be updated. + * \param[in] offset The new offset value. + * \param[in] length The new length value. + * + * \post The descriptor is updated. + * + */ +__INLINE__ void DMA_DescriptorUpdateOffsetLength (DMA_InjDescriptor_t *desc, + unsigned offset, + unsigned length) +{ + desc->base_offset = offset; + desc->msg_length = length; +} + + + +/*! + * \brief Set the Put Offset in a Descriptor + * + * This sets the "put_offset" field of the software packet header in the + * provided descriptor. This field is placed into the packet header by + * the DMA. In the first packet, this field is placed into the packet + * unchanged. In each subsequent packet, the DMA adds to this field + * the number of payload bytes from the previous packet. + * + * \param[in] desc Pointer to descriptor. + * \param[in] offset The offset value to be set. + * + * \post The Put Offset in the descriptor is set. + * + */ +__INLINE__ void DMA_DescriptorSetPutOffset (DMA_InjDescriptor_t *desc, + unsigned offset) +{ + desc->hwHdr.Put_Offset = offset; +} + +__END_DECLS + +#endif diff --git a/arch/powerpc/include/spi/DMA_Fifo.h b/arch/powerpc/include/spi/DMA_Fifo.h new file mode 100644 index 00000000000000..c8e7f9e0e66805 --- /dev/null +++ b/arch/powerpc/include/spi/DMA_Fifo.h @@ -0,0 +1,1011 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + +#ifndef _DMA_FIFO_H_ /* Prevent multiple inclusion */ +#define _DMA_FIFO_H_ + + +/*! + * \file spi/DMA_Fifo.h + * + * \brief DMA SPI Fifo Definitions and Inline Functions Common to Injection + * and Reception Fifos + * + * This include file contains data structures and inline functions that are + * common among injection and reception fifos. The inlines are used to + * interface with the fifos at the lowest level. + * + * There are two levels of access: hardware and software. For direct + * hardware access, the DMA_FifoHW_t structure describes fields that reside + * in the "hardware fifo" in DMA SRAM. For normal software access, the + * DMA_Fifo_t structure contains a pointer to the hardware structure, + * shadows (snapshot copies) of the fields in the hardware structure, and + * size information calculated from the shadows. + * + * \verbatim Picture of fifo structures + + ========DDR MEMORY===================|==========DMA SRAM MEMORY========== + ------------------------------ | + | DMA_Fifo_t | | + | | | + | Software Fifo | | + | | | + | | | ----------------------------- + | fifo_hw_ptr--------------|-------|---->| DMA_FifoHW_t | + | | | | | + | | | | Hardware Fifo | + | Shadow Pointers | | ----------------------------- + | . | | + ------------------------------ | + + \endverbatim + * + * For normal messaging software, one should access the DMA using the + * DMA_Fifo_t, DMA_InjFifo_t, or DMA_RecFifo_t structures since + * they maintain shadows. This include file contains inline functions that + * operate on the DMA_Fifo_t for this purpose. Functions include: + * - get va_start, va_head, va_tail, va_end, fifo size, fifo free_space + * - set va_head, va_tail + * - update fifo free-space based upon current shadows + * + * However, for bringup or diagnostic software, there is a need for direct + * access to the hardware fifos. This include file contains functions that + * operate on the DMA_FifoHW_t for this purpose. Functions include: + * - get pa_start, pa_head, pa_tail, pa_end + * - set pa_start, pa_head, pa_tail, pa_end + * While it probably doesn't make sense to have a stand-alone + * DMA_FifoSetStartPa() or DMA_FifoSetEndPa() since this dynamically + * messes up the fifo, causing unpredictable results. But bringup or + * diagnostic software will need this (with dma disabled, or the fifo + * disabled). Therefore we provide direct interfaces using physical + * addresses and no shadows (for speed). + * + * Definitions: + * - A fifo represents a contiguous block of DDR memory + * - A fifo has a starting address and an ending address (defines the memory + * block) + * - An injection fifo is a series of 32-byte descriptors. + * - Injection consists of copying a 32-byte descriptor into the next available + * slot (pointed to by the tail) and incrementing the tail pointer. + * - The DMA engine asynchronously processes descriptors, beginning with the + * descriptor pointed to by head, and ending with the descriptor just prior + * to tail. + * - There are injection (DMA InjFifo) and reception (DMA RecFifo) fifos + * (separate interfaces) + * - There are DMA_NUM_INJ_FIFO_GROUPS injection fifo groups + * - There are DMA_NUM_INJ_FIFOS_PER_GROUP injection fifos per group + * - Thus, there are DMA_NUM_INJ_FIFOS injection fifos per node + * - There are DMA_NUM_REC_FIFO_GROUPS reception fifo groups + * - There are DMA_NUM_REC_FIFOS_PER_GROUP reception fifos per group + * - Thus, there are DMA_NUM_REC_FIFOS reception fifos per node + * - A "shadow" refers to a copy of the elements of the fifo (start, end, head, + * tail) that is maintained by these inline functions. The shadows may be + * used to calculate other values such as free space. The shadows are updated + * by these inlines whenever the hardware fifo is read or written. + * + * \note Memory consistency/coherency inside these inlines is achieved using + * mbar and msync. + * + * MBAR is used to make sure that all writes to memory issued by the + * calling core have been accepted by the memory system before + * continuing. This guarantees that writes and reads to/from different + * addresses to go in defined order. + * + * MBAR EXAMPLE 1: When a store is done to DMA SRAM, it may not complete + * for a period of time. If a counter value is set, and then an injection + * fifo tail pointer is set, DMA may see the tail pointer update and begin + * the operation before the counter value has been set. Inserting an mbar + * between the setting of the counter and the setting of the tail pointer + * guarantees that the counter will be set before the tail pointer is + * updated. + * + * MBAR EXAMPLE 2: A counter hits zero. We process the hit-zero and write + * a "clear hit zero" to DMA SRAM, and then go read that counter's hit-zero + * status (different address). The hit-zero status will still indicate + * that it hit zero, even though we have already processed it, unless an + * mbar is inserted between clearing the hit-zero and reading the hit-zero + * status. + * + * MBAR PHILOSOPHY: After DMA SRAM is updated in the DMA inline functions, + * they always do at least an mbar (possibly an msync instead...see below). + * + * MSYNC does what mbar does, plus ensures consistency across cores. That + * is, it waits for snoops (invalidations of L1 cache) on the other cores + * to complete before continuing. This guarantees that all of the cores + * will see a consistent view of memory after the msync. + * + * MSYNC EXAMPLE: When a reception counter has hit zero, we assume the + * DMA'd data is available to be read by any core. However, old copies of + * that data may still be in the L1 caches. Inserting an msync after + * detecting that a counter has hit zero guarantees that the old data has + * been removed from the L1 caches. + * + * MSYNC PHILOSOPHY: After the inline functions detect that a counter has + * hit zero, they always do an msync. + * + * SPECULATIVE EXECUTION OF MSYNC: There are cases where msync is done + * conditionally. The CPU will begin execution of both sides of the + * condition before the result of the condition has been determined. + * Then, it will cancel the execution of one side once the result of the + * condition has been determined. This speculation is unwanted when + * the first instruction on one side of the condition is msync because + * cancelling an msync is similar to executing the complete msync. + * To avoid this speculative execution of msync, we call + * _bgp_msync_nonspeculative(). This will trick the CPU so it won't begin + * the msync until the result of the condition is known. + * + * CALLER ADVICE: Users of these functions should not need to do + * mbar/msync themselves, unless they are doing something like the + * following: Read a counter and operate on the result when the counter + * hasn't reached zero. The caller will need to perform an msync after + * reading the counter in order to ensure that snoops have completed + * on all CPUs before operating on the DMA'd data. + */ + + +#include <common/namespace.h> + +__BEGIN_DECLS + + +/*! + * \brief __INLINE__ definition + * + * Option 1: + * Make all functions be "static inline": + * - They are inlined if the compiler can do it + * - If the compiler does not inline it, a single copy of the function is + * placed in the translation unit (eg. xxx.c)for use within that unit. + * The function is not externalized for use by another unit...we want this + * so we don't end up with multiple units exporting the same function, + * which would result in linker errors. + * + * Option 2: + * A GNU C model: Use "extern inline" in a common header (this one) and provide + * a definition in a .c file somewhere, perhaps using macros to ensure that the + * same code is used in each case. For instance, in the header file: + * + \verbatim + #ifndef INLINE + # define INLINE extern inline + #endif + INLINE int max(int a, int b) { + return a > b ? a : b; + } + \endverbatim + * + * ...and in exactly one source file (in runtime/SPI), that is included in a + * library... + * + \verbatim + #define INLINE + #include "header.h" + \endverbatim + * + * This allows inlining, where possible, but when not possible, only one + * instance of the function is in storage (in the library). + */ +#ifndef __INLINE__ +#define __INLINE__ extern inline +#endif + + + + +#include <spi/DMA_Assert.h> +#include <spi/kernel_interface.h> + + + +/*! + * \brief Number of fifo groups + */ +#define DMA_NUM_FIFO_GROUPS 4 + + +/*! + * \brief Hardware DMA Fifo + * + * This maps the hardware fifo (the DMA SRAM) for a fifo. These fields are + * common for injection and reception fifos. + * + * The fifo represents a physically contiguous block of memory. + * + */ +typedef struct DMA_FifoHW_t +{ + volatile unsigned pa_start; /*!< RW fifo start address. + 16B-aligned 4-bit shifted address. */ + + volatile unsigned pa_end; /*!< RW fifo end address. + 16B-aligned 4-bit shifted address. */ + + volatile unsigned pa_head; /*!< RW fifo head pointer. + 16B-aligned 4-bit shifted address. + Injection fifo head moved by DMA. + Reception fifo head moved by cores. + Remote get injection fifo head moved + by DMA. */ + + volatile unsigned pa_tail; /*!< RW fifo tail pointer. + 16B-aligned 4-bit shifted address. + Injection fifo tail moved by cores. + Reception fifo tail moved by DMA. + Remote get injection fifo tail moved + by DMA. */ +} +DMA_FifoHW_t; + + +/*! + * \brief Software DMA Fifo structure + * + * This structure contains a pointer to the hardware fifo, and other fields that + * describe software's view of the fifo. These fields are common for injection + * and reception fifos. + * + * \todo Some more careful thought should be given how to group these so as to + * get best memory system performance. + * eg. Probably want to ALIGN_L3_CACHE the fifo_hw_ptr. + * Might want to have an assert to check that sizeof( DMA_Fifo_t) + * is 32. + * COMMENT: I think below definition puts the entire structure in one + * L1 line. + */ +typedef struct DMA_Fifo_t +{ + DMA_FifoHW_t *fifo_hw_ptr; /*!< Pointer to hardware fifo. */ + + unsigned int free_space; /*!< Shadow of how much free space is in the + fifo, in units of 16B quads. */ + + unsigned int fifo_size; /*!< Shadow of how much total space is in the + fifo, in units of 16B quads. */ + + unsigned int pa_start; /*!< Physical address of the start. (shadow) + 16B-aligned 4-bit shifted address. + Enables simple calculation of va_head, + va_tail, and va_end. */ + /*! + * \note The following 4 fields are shadows of the hardware fifo. + * They should be in the same L1 cache line for performance. + * They are updated by the inline functions in this file upon each + * read or write to the fifo. + */ + void *va_start; /*!< Shadow of the virtual address start of + the fifo. Must be 32B aligned. */ + + void *va_head; /*!< Shadow of the virtual address head of + the fifo. */ + + void *va_tail; /*!< Shadow of the virtual address tail of + the fifo. */ + + void *va_end; /*!< Shadow of the virtual address end of + the fifo. Must be 32B aligned. */ + +} +/*! + * With above, there should be 8 fields x 4 bytes/field = 32 bytes in the + * structure. Below ensures these 32 bytes are in the same cache line. + */ +ALIGN_L1D_CACHE DMA_Fifo_t; + +/* + *------------------------------------------------------------------------------ + * The following functions operate on fields in the hardware and software fifo + * structures. + *------------------------------------------------------------------------------ + */ + + +/*! + * \brief Update DMA Fifo Free Space from the Shadow + * + * Force a recalculation of a DMA fifo's amount of free space, given a software + * fifo structure. + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \return None + * + * \note WARNING: The calculation is based on the current shadow values of the + * head and tail, not the actual hardware values. + * + */ +__INLINE__ void DMA_FifoUpdateFreeSpaceFromShadow( + DMA_Fifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + + /* + * Recompute the amount of free space in the fifo, given the current shadows. + */ + + if ( f_ptr->va_tail >= f_ptr->va_head) + { + f_ptr->free_space = f_ptr->fifo_size - + ( ( (unsigned)(f_ptr->va_tail) - + (unsigned)(f_ptr->va_head) ) >> 4 ); + } + else + { + f_ptr->free_space = ( (unsigned)(f_ptr->va_head) - + (unsigned)(f_ptr->va_tail) ) >> 4; + } + +} + + +/*! + * \brief Get DMA Fifo Start Virtual Address from the Shadow + * + * Get a DMA fifo's "start" virtual address, given a software fifo structure + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \retval va_start The virtual address of the start of the fifo + * + * \note WARNING: This function does not read the DMA SRAM, but instead returns + * the current shadow va_start. To actually issue a read to the + * DMA, use DMA_FifoGetStartPa(). + */ +__INLINE__ void * DMA_FifoGetStartFromShadow( + DMA_Fifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + + return f_ptr->va_start; +} + + +/*! + * \brief Get DMA Fifo Head Virtual Address + * + * Get a DMA fifo's "head" virtual address, given a software fifo structure + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \retval va_head The virtual address of the head of the fifo + * + * \post va_head is recalculated from the current hardware head, updated in + * the software fifo structure, and returned. Additionally, the free + * space in the software fifo structure is updated. + * + */ +__INLINE__ void * DMA_FifoGetHead( + DMA_Fifo_t *f_ptr + ) +{ + unsigned int val; + + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + + /* Read the DMA to get the head. + * Recompute va_head based upon the va_start and the current hardware head. + * Update free_space. + */ + + val = f_ptr->fifo_hw_ptr->pa_head; + + f_ptr->va_head = (char*)( (unsigned)f_ptr->va_start + + ( ( val - f_ptr->pa_start ) << 4 ) ); + + DMA_FifoUpdateFreeSpaceFromShadow( f_ptr ); + + return f_ptr->va_head; + +} + + +/*! + * \brief Get DMA Fifo Head Virtual Address Without Updating Free Space + * + * Get a DMA fifo's "head" virtual address, given a software fifo structure, + * without updating the fifo's free space. It is up to the caller to ensure + * this update occurs later, if necessary. + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \retval va_head The virtual address of the head of the fifo + * + * \post va_head is recalculated from the current hardware head, updated in + * the software fifo structure, and returned. + * + */ +__INLINE__ void * DMA_FifoGetHeadNoFreeSpaceUpdate( + DMA_Fifo_t *f_ptr + ) +{ + unsigned int val; + + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + + /* Read the DMA to get the head. + * Recompute va_head based upon the va_start and the current hardware head. + */ + + val = f_ptr->fifo_hw_ptr->pa_head; + + f_ptr->va_head = (char*)( (unsigned)f_ptr->va_start + + ( ( val - f_ptr->pa_start ) << 4 ) ); + + return f_ptr->va_head; + +} + + +/*! + * \brief Get DMA Fifo Tail Virtual Address + * + * Get a DMA fifo's "tail" virtual address, given a software fifo structure + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \retval va_tail The virtual address of the tail of the fifo + * + * \post va_tail is recalculated from the current hardware tail, updated in + * the software fifo structure, and returned. Additionally, the free + * space in the software fifo structure is updated. + * + */ +__INLINE__ void * DMA_FifoGetTail( + DMA_Fifo_t *f_ptr + ) +{ + unsigned int val; + + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + + /* Read the DMA to get the tail. + * Recompute va_tail based upon the va_start and the current hardware tail. + * Update free_space. + */ + + val = f_ptr->fifo_hw_ptr->pa_tail; + + f_ptr->va_tail = (char*)( (unsigned)f_ptr->va_start + + ( ( val - f_ptr->pa_start ) << 4 ) ); + + DMA_FifoUpdateFreeSpaceFromShadow( f_ptr ); + + return f_ptr->va_tail; + + +} + + +/*! + * \brief Get DMA Fifo Tail Virtual Address Without Updating Free Space + * + * Get a DMA fifo's "tail" virtual address, given a software fifo structure, + * without updating the fifo's free space. It is up to the caller to + * invoke DMA_FifoUpdateFreeSpaceFromShadow() at a later time. + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \retval va_tail The virtual address of the tail of the fifo + * + * \post va_tail is recalculated from the current hardware tail, updated in + * the software fifo structure, and returned. + * + */ +__INLINE__ void * DMA_FifoGetTailNoFreeSpaceUpdate( + DMA_Fifo_t *f_ptr + ) +{ + unsigned int val; + + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + + /* Read the DMA to get the tail. + * Recompute va_tail based upon the va_start and the current hardware tail. + */ + + val = f_ptr->fifo_hw_ptr->pa_tail; + + f_ptr->va_tail = (char*)( (unsigned)f_ptr->va_start + + ( ( val - f_ptr->pa_start ) << 4 ) ); + + return f_ptr->va_tail; + +} + + +/*! + * \brief Get DMA Fifo Tail Virtual Address from Shadow + * + * Get a DMA fifo's "tail" virtual address, given a software fifo structure + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \retval va_tail The virtual address of the tail of the fifo + * + * \post va_tail is obtained from the shadow, NOT recalculated from the + * current hardware tail. The free space in the software fifo + * structure is NOT updated. + * + */ +__INLINE__ void * DMA_FifoGetTailFromShadow( + DMA_Fifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + + return f_ptr->va_tail; + +} + + +/*! + * \brief Get DMA Fifo End Virtual Address from the Shadow + * + * Get a DMA fifo's "end" virtual address, given a software fifo structure + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \retval va_end The virtual address of the end of the fifo + * + * \note WARNING: This function does not read the DMA SRAM, but instead returns + * the current shadow va_end. To actually issue a read to the + * DMA, use DMA_FifoGetEndPa(). + */ +__INLINE__ void * DMA_FifoGetEndFromShadow( + DMA_Fifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + + return f_ptr->va_end; +} + + +/*! + * \brief Get DMA Fifo Size + * + * Get a DMA fifo's size, given a software fifo structure + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \retval size The size of the DMA fifo, in units of 16B quads. + * + * \note WARNING: This function does not calculate the size based on the DMA + * SRAM's current start and end values, but instead returns the + * size that was calculated when the fifo was initialized. + */ +__INLINE__ unsigned int DMA_FifoGetSize( + DMA_Fifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + + return f_ptr->fifo_size; +} + + +/*! + * \brief Get DMA Fifo Free Space With No Update Calculation + * + * Get a DMA fifo's amount of free space, given a software fifo structure. + * Do not perform update calculations. + * + * \param[in] f_ptr Pointer to the software fifo structure + * + * \retval freeSpace The amount of free space in the fifo, in units of + * 16B quads. + */ +__INLINE__ unsigned int DMA_FifoGetFreeSpaceNoUpdateCalculation( + DMA_Fifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + + return f_ptr->free_space; +} + + +/*! + * \brief Get DMA Fifo Free Space + * + * Get a DMA fifo's amount of free space, given a software fifo structure + * + * \param[in] f_ptr Pointer to the software fifo structure + * \param[in] read_head Indicates whether to read the head from the hardware + * fifo before calculating the free space. + * - 1 means to read the hardware head + * - 0 means to use the current head shadow + * \param[in] read_tail Indicates whether to read the tail from the hardware + * fifo before calculating the free space. + * - 1 means to read the hardware tail + * - 0 means to use the current tail shadow + * + * \retval freeSpace The amount of free space in the fifo, in units of + * 16B quads. + * + * \note If both read_head and read_tail are false, the amount of free space is + * calculated based on the current shadow values of head and tail. + */ +__INLINE__ unsigned int DMA_FifoGetFreeSpace( + DMA_Fifo_t *f_ptr, + unsigned int read_head, + unsigned int read_tail + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + SPI_assert( read_head == 1 || read_head == 0 ); + SPI_assert( read_tail == 1 || read_tail == 0 ); + + /* + * If both read_head and read_tail are 0, return the current shadow. + * If read_head != 0, read the head of the fifo first and recompute free space. + * If read_tail != 0, read the tail of the fifo first and recompute free space. + */ + + if ( (read_head == 0) && ( read_tail == 0) ) + DMA_FifoUpdateFreeSpaceFromShadow( f_ptr); + else + { + if ( read_head == 1) DMA_FifoGetHead(f_ptr); /* This does an update */ + /* of the free space. */ + if ( read_tail == 1) DMA_FifoGetTail(f_ptr); /* This does an update */ + /* of the free space. */ + } + + return f_ptr->free_space; + +} + + +/*! + * \brief Set DMA Fifo Head + * + * Set a DMA fifo's "head", given a software fifo structure + * + * \param[in] f_ptr Pointer to the software fifo structure + * \param[in] va_head Virtual address of the head to be set + * + * \return None + * + * \post va_head is set in both the hardware and software fifo structures, + * and the fifo free space is recalculated. + * + * \note Normally, for an injection fifo, the dma manipulates the head, but in + * optimized persistant communications the core can do it if it is sure + * the fifo is empty at the time this is called. + */ +__INLINE__ void DMA_FifoSetHead( + DMA_Fifo_t *f_ptr, + void *va_head + ) +{ + unsigned int pa_head; + + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + SPI_assert( va_head >= f_ptr->va_start && + va_head < f_ptr->va_end ); + + /* + * Calculate new pa_head based on the shadow pa_start and va_start. + */ + pa_head = f_ptr->pa_start + ( ( (unsigned)va_head - + (unsigned)f_ptr->va_start ) >> 4 ); + + /* + * Set the hardware head + */ + f_ptr->fifo_hw_ptr->pa_head = pa_head; + _bgp_mbar(); + + /* + * Update the software fifo structure's head and free space. + */ + f_ptr->va_head = va_head; + + DMA_FifoUpdateFreeSpaceFromShadow( f_ptr ); + +} + + +/*! + * \brief Set DMA Fifo Tail + * + * Set a DMA fifo's "tail", given a software fifo structure + * + * \param[in] f_ptr Pointer to the software fifo structure + * \param[in] va_tail Virtual address of the tail to be set + * + * \return None + * + * \post va_tail is set in both the hardware and software fifo structures, + * and the fifo free space is recalculated. + * + */ +__INLINE__ void DMA_FifoSetTail( + DMA_Fifo_t *f_ptr, + void *va_tail + ) +{ + unsigned int pa_tail; + + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->fifo_hw_ptr != NULL ); + SPI_assert( va_tail >= f_ptr->va_start && + va_tail < f_ptr->va_end ); + + /* + * Calculate new pa_tail based on the shadow pa_start and va_start. + */ + pa_tail = f_ptr->pa_start + ( ( (unsigned)va_tail - + (unsigned)f_ptr->va_start ) >> 4 ); + + /* + * Set the hardware tail + */ + f_ptr->fifo_hw_ptr->pa_tail = pa_tail; + _bgp_mbar(); + + /* + * Update the software fifo structure's tail and free space. + */ + f_ptr->va_tail = va_tail; + + DMA_FifoUpdateFreeSpaceFromShadow( f_ptr ); + +} + + + + +/* + *------------------------------------------------------------------------------ + * The following functions operate directly on the hardware fifo. Normally, + * users should use the software fifo routines (previously defined), but for + * bringup or diagnostics, it may be desirable to use these. + *------------------------------------------------------------------------------ + */ + + + + +/*! + * \brief Set DMA Hardware Fifo Start + * + * Set a DMA fifo's "start", given a hardware fifo structure + * + * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure + * \param[in] pa_start Physical address of the start to be set. + * 16B-aligned 4-bit shifted physical address. + * + * \return None + * + * \note This function does an MBAR after setting the fifo to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + */ +__INLINE__ void DMA_FifoSetStartPa( + DMA_FifoHW_t *fifo_hw_ptr, + unsigned int pa_start + ) +{ + SPI_assert( fifo_hw_ptr != NULL ); + + fifo_hw_ptr->pa_start = pa_start; + + _bgp_mbar(); + +} + + +/*! + * \brief Set DMA Hardware Fifo Head + * + * Set a DMA fifo's "head", given a hardware fifo structure + * + * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure + * \param[in] pa_head Physical address of the head to be set. + * 16B-aligned 4-bit shifted physical address. + * + * \return None + * + * \note This function does an MBAR after setting the fifo to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + */ +__INLINE__ void DMA_FifoSetHeadPa( + DMA_FifoHW_t *fifo_hw_ptr, + unsigned int pa_head + ) +{ + SPI_assert( fifo_hw_ptr != NULL ); + + fifo_hw_ptr->pa_head = pa_head; + + _bgp_mbar(); + +} + + +/*! + * \brief Set DMA Hardware Fifo Tail + * + * Set a DMA fifo's "tail", given a hardware fifo structure + * + * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure + * \param[in] pa_tail Physical address of the tail to be set. + * 16B-aligned 4-bit shifted physical address. + * + * \return None + * + * \note This function does an MBAR after setting the fifo to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + */ +__INLINE__ void DMA_FifoSetTailPa( + DMA_FifoHW_t *fifo_hw_ptr, + unsigned int pa_tail + ) + +{ + SPI_assert( fifo_hw_ptr != NULL ); + + fifo_hw_ptr->pa_tail = pa_tail; + + _bgp_mbar(); + +} + + +/*! + * \brief Set DMA Hardware Fifo End + * + * Set a DMA fifo's "end", given a hardware fifo structure + * + * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure + * \param[in] pa_end Physical address of the end to be set. + * 16B-aligned 4-bit shifted physical address. + * + * \return None + * + * \note This function does an MBAR after setting the fifo to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. + */ +__INLINE__ void DMA_FifoSetEndPa( + DMA_FifoHW_t *fifo_hw_ptr, + unsigned int pa_end + ) +{ + SPI_assert( fifo_hw_ptr != NULL ); + + fifo_hw_ptr->pa_end = pa_end; + + _bgp_mbar(); + +} + + +/*! + * \brief Get DMA Hardware Fifo Start + * + * Get a DMA fifo's "start", given a hardware fifo structure + * + * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure + * + * \retval pa_start Physical address of the fifo start. + * 16B-aligned 4-bit shifted physical address. + * + * \return None + * + */ +__INLINE__ unsigned int DMA_FifoGetStartPa( + DMA_FifoHW_t *fifo_hw_ptr + ) +{ + SPI_assert( fifo_hw_ptr != NULL ); + + return fifo_hw_ptr->pa_start; +} + + +/*! + * \brief Get DMA Hardware Fifo Head + * + * Get a DMA fifo's "head", given a hardware fifo structure + * + * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure + * + * \retval pa_head Physical address of the fifo head. + * 16B-aligned 4-bit shifted physical address. + * + * \return None + * + */ +__INLINE__ unsigned int DMA_FifoGetHeadPa( + DMA_FifoHW_t *fifo_hw_ptr + ) +{ + SPI_assert( fifo_hw_ptr != NULL ); + + return fifo_hw_ptr->pa_head; +} + + +/*! + * \brief Get DMA Hardware Fifo Tail + * + * Get a DMA fifo's "tail", given a hardware fifo structure + * + * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure + * + * \retval pa_tail Physical address of the fifo tail. + * 16B-aligned 4-bit shifted physical address. + * + * \return None + * + */ +__INLINE__ unsigned int DMA_FifoGetTailPa( + DMA_FifoHW_t *fifo_hw_ptr + ) +{ + SPI_assert( fifo_hw_ptr != NULL ); + + return fifo_hw_ptr->pa_tail; +} + + +/*! + * \brief Get DMA Hardware Fifo End + * + * Get a DMA fifo's "end", given a hardware fifo structure + * + * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure + * + * \retval pa_end Physical address of the fifo end. + * 16B-aligned 4-bit shifted physical address. + * + * \return None + * + */ +__INLINE__ unsigned int DMA_FifoGetEndPa( + DMA_FifoHW_t *fifo_hw_ptr + ) +{ + SPI_assert( fifo_hw_ptr != NULL ); + + return fifo_hw_ptr->pa_end; +} + + +__END_DECLS + + +#endif diff --git a/arch/powerpc/include/spi/DMA_InjFifo.h b/arch/powerpc/include/spi/DMA_InjFifo.h new file mode 100644 index 00000000000000..777a21cceaa178 --- /dev/null +++ b/arch/powerpc/include/spi/DMA_InjFifo.h @@ -0,0 +1,2475 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + +#ifndef _DMA_INJFIFO_H_ /* Prevent multiple inclusion */ +#define _DMA_INJFIFO_H_ + + +/*! + * \file spi/DMA_InjFifo.h + * + * \brief DMA SPI Injection Fifo Definitions and Inline Functions + * + * This include file contains inline functions that are used to interface with + * BG/P DMA injection fifos at the lowest level. + * Functions include + * - initialize + * - get fifo start, head, tail, end, size, free space, descriptor count + * - set fifo head, tail, start PA, head PA, tail PA, end PA + * - increment tail + * - inject descriptor(s) + * - query status: not empty, available, threshold crossed, activated, + * descriptor done. + * - set status: clear threshold crossed, activate, deactivate + * + * Data structures are defined to manipulate the injection fifos: + * - An injection fifo group structure defining a group of injection fifos + * - Within the group are injection fifo structures + * - Within each injection fifo structure is a software fifo structure + * - Each software fifo structure points to its corresponding hardware + * fifo structure in the DMA SRAM + * + * \verbatim Picture of data structures: + + ========DDR MEMORY===================|==========DMA SRAM MEMORY========== + ------------------------------ | + | DMA_InjFifoGroup_t | | + | | | ----------------------------- + | status --------------------|-------|---->| DMA_InjFifoStatus_t | + | fifo[0..31] | | ----------------------------- + | ------------------------ | | + | | DMA_InjFifo_t | | | + | | | | | + | 0 | ------------------- | | | ----------------------------- + | | | DMA_Fifo_t |-|-|-------|---->| DMA_FifoHW_t | + | | ------------------- | | | ----------------------------- + | ------------------------ | | + | . | | + | . | | + | . | | + | ------------------------ | | + | | DMA_InjFifo_t | | | + | | | | | + |31 | ------------------- | | | ----------------------------- + | | | DMA_Fifo_t |-|-|-------|---->| DMA_FifoHW_t | + | | ------------------- | | | ----------------------------- + | ------------------------ | | + ------------------------------ | + + \endverbatim + * + * Definitions: + * - A fifo represents a contiguous block of DDR memory + * - A fifo has a starting address and an ending address (defines the memory + * block) + * - An injection fifo is a series of 32-byte descriptors. There is a count + * of the number of descriptors ever injected into this fifo. It will never + * wrap in the expected lifetime of a job. + * - Injection consists of copying a 32-byte descriptor into the next available + * slot (pointed to by the tail), incrementing the tail pointer, and + * incrementing the descriptor count for the fifo. + * - The DMA engine asynchronously processes descriptors, beginning with the + * descriptor pointed to by head, and ending with the descriptor just prior + * to tail. + * - There are injection (DMA InjFifo) and reception (DMA RecFifo) fifos + * (separate interfaces) + * - There are DMA_NUM_INJ_FIFO_GROUPS injection fifo groups + * - There are DMA_NUM_INJ_FIFOS_PER_GROUP injection fifos per group + * - Thus, there are DMA_NUM_INJ_FIFOS injection fifos per node + * - There are DMA_NUM_REC_FIFO_GROUPS reception fifo groups + * - There are DMA_NUM_REC_FIFOS_PER_GROUP reception fifos per group + * - Thus, there are DMA_NUM_REC_FIFOS reception fifos per node + * - A "shadow" refers to a copy of the elements of the fifo (start, end, head, + * tail) that is maintained by these inline functions. The shadows may be + * used to calculate other values such as free space. The shadows are updated + * by these inlines whenever the hardware fifo is read or written. + * + * \note These functions do not try to detect things that software shouldn't do, + * like injecting a descriptor into a remote_get fifo, since the hardware + * doesn't distinguish between remote get fifos and normal injection + * fifos. That sort of checking should be done in a higher level. + * + * \note Memory consistency/coherency inside these inlines is achieved using + * mbar and msync. + * + * MBAR is used to make sure that all writes to memory issued by the + * calling core have been accepted by the memory system before + * continuing. This guarantees that writes and reads to/from different + * addresses to go in defined order. + * + * MBAR EXAMPLE 1: When a store is done to DMA SRAM, it may not complete + * for a period of time. If a counter value is set, and then an injection + * fifo tail pointer is set, DMA may see the tail pointer update and begin + * the operation before the counter value has been set. Inserting an mbar + * between the setting of the counter and the setting of the tail pointer + * guarantees that the counter will be set before the tail pointer is + * updated. + * + * MBAR EXAMPLE 2: A counter hits zero. We process the hit-zero and write + * a "clear hit zero" to DMA SRAM, and then go read that counter's hit-zero + * status (different address). The hit-zero status will still indicate + * that it hit zero, even though we have already processed it, unless an + * mbar is inserted between clearing the hit-zero and reading the hit-zero + * status. + * + * MBAR PHILOSOPHY: After DMA SRAM is updated in the DMA inline functions, + * they always do at least an mbar (possibly an msync instead...see below). + * + * MSYNC does what mbar does, plus ensures consistency across cores. That + * is, it waits for snoops (invalidations of L1 cache) on the other cores + * to complete before continuing. This guarantees that all of the cores + * will see a consistent view of memory after the msync. + * + * MSYNC EXAMPLE: When a reception counter has hit zero, we assume the + * DMA'd data is available to be read by any core. However, old copies of + * that data may still be in the L1 caches. Inserting an msync after + * detecting that a counter has hit zero guarantees that the old data has + * been removed from the L1 caches. + * + * MSYNC PHILOSOPHY: After the inline functions detect that a counter has + * hit zero, they always do an msync. + * + * SPECULATIVE EXECUTION OF MSYNC: There are cases where msync is done + * conditionally. The CPU will begin execution of both sides of the + * condition before the result of the condition has been determined. + * Then, it will cancel the execution of one side once the result of the + * condition has been determined. This speculation is unwanted when + * the first instruction on one side of the condition is msync because + * cancelling an msync is similar to executing the complete msync. + * To avoid this speculative execution of msync, we call + * _bgp_msync_nonspeculative(). This will trick the CPU so it won't begin + * the msync until the result of the condition is known. + * + * CALLER ADVICE: Users of these functions should not need to do + * mbar/msync themselves, unless they are doing something like the + * following: Read a counter and operate on the result when the counter + * hasn't reached zero. The caller will need to perform an msync after + * reading the counter in order to ensure that snoops have completed + * on all CPUs before operating on the DMA'd data. + * + * \note General discussion on injection fifo interrupts. Both the warning + * threshold crossed and full fifo interrupts... + * + * For remote gets, a fifo is considered available if it has at least 512 bytes + * free (32 16B quads). An arriving remote get can be written if there are 512 + * bytes free, but after that the available goes low and no further remote gets + * can be written to any fifo. Furthermore, if any injection fifo has less than + * 512 bytes free, the fifo becomes unavailable and any arriving remote get + * packet will cause an interrupt to fire and the rDMA will stop. + * + * Specifically, if an injection fifo has less than 512 B (by either injecting + * or remote gets) the iDMA will continue to operate and the rDMA will continue + * to operate until any remote get packet arrives to any fifo, at which point + * an interrupt fires and the rDMA stops. + * + * Note that these interrupts were put in for warnings of remote get fifos + * becoming nearly full. However the time between when the warning fires and the + * condition is cleared may be long, reconfiguring an almost full remote get + * fifo is difficult, and recovery from full remote get injection fifos is very + * difficult. Since software can prevent this, and since recovery is so + * difficult, we consider injection fifo threshold crossing interrupts and + * injection fifo full interrupts to be fatal. Thus there is no handler function + * in the injection fifo allocation routine. + * + * So software needs to manage injection and remote get fifo space so that there + * are always at least 512 bytes of free space in every fifo. To accomplish + * this, software needs to guarantee it won't inject descriptors if doing so + * would trigger an interrupt or make the fifo unavailable. + * + * This can be done by setting the interrupt threshold to 0 (interrupt fires if + * free space <= threshhold), and not injecting if after injection there are + * less than DMA_MIN_INJECT_SIZE_IN_QUADS (=32) slots. Furthermore, remote + * get space should not be allocated if doing so might result in strictly less + * than DMA_MIN_INJECT_SIZE_IN_QUADS slots. + * + */ + + + +#include <common/namespace.h> +/* #include <memory.h> */ + + +__BEGIN_DECLS + + +/*! + * \brief __INLINE__ definition + * + * Option 1: + * Make all functions be "static inline": + * - They are inlined if the compiler can do it + * - If the compiler does not inline it, a single copy of the function is + * placed in the translation unit (eg. xxx.c)for use within that unit. + * The function is not externalized for use by another unit...we want this + * so we don't end up with multiple units exporting the same function, + * which would result in linker errors. + * + * Option 2: + * A GNU C model: Use "extern inline" in a common header (this one) and provide + * a definition in a .c file somewhere, perhaps using macros to ensure that the + * same code is used in each case. For instance, in the header file: + * + \verbatim + #ifndef INLINE + # define INLINE extern inline + #endif + INLINE int max(int a, int b) { + return a > b ? a : b; + } + \endverbatim + * + * ...and in exactly one source file (in runtime/SPI), that is included in a + * library... + * + \verbatim + #define INLINE + #include "header.h" + \endverbatim + * + * This allows inlining, where possible, but when not possible, only one + * instance of the function is in storage (in the library). + */ +#ifndef __INLINE__ +#define __INLINE__ extern inline +#endif + + + +#include <spi/DMA_Assert.h> +#include <spi/DMA_Fifo.h> +#include <spi/DMA_Descriptors.h> + +/* + * You can save a few cycles by using the parallel floating point unit to do the 'memcpy' + * as part of injecting a descriptor into a FIFO; but you then need to quadword-align the source memory + * and you may need to save/restore the FP context. Setting k_use_fp_to_inject to 0 arranges for the + * generated code to use integer registers for the 'memcpy'. + */ +enum { + k_use_fp_to_inject = 0 +}; + + +/*! + * \brief Number of Injection Fifo Groups + */ +#define DMA_NUM_INJ_FIFO_GROUPS 4 + + +/*! + * \brief Number of Injection Fifos per Group + */ +#define DMA_NUM_INJ_FIFOS_PER_GROUP 32 + + +/*! + * \brief Number of Injection Fifos (total) + */ +#define DMA_NUM_INJ_FIFOS (DMA_NUM_INJ_FIFO_GROUPS*DMA_NUM_INJ_FIFOS_PER_GROUP) + + +/*! + * \brief Minimum Free Space Required After Injection + * + * This is the number of 16-byte quads that need to be free in a fifo after + * injection of a descriptor. + */ +#define DMA_MIN_INJECT_SIZE_IN_QUADS 32 + + +/*! + * \brief Number of 16-byte quads in a fifo descriptor + * + */ +#define DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS 2 + + +/*! + * \brief Number of bytes in a fifo descriptor + * + */ +#define DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS*16 + + +/*! + * \brief Minimum size of a fifo, somewhat arbitrary + */ +#define DMA_MIN_INJ_FIFO_SIZE_IN_BYTES (256*4) + + +/*! + * \brief Injection DMA Fifo Structure + * + * This structure contains a software DMA fifo structure (defined in DMA_Fifo.h) + * and other fields that are specific to an injection fifo used by software. + * + * \todo Some more careful thought should be given how to group these so as to + * get best memory system performance. + * eg. Probably want to ALIGN_L3_CACHE the fifo_hw_ptr. + * + */ +typedef struct DMA_InjFifo_t +{ + DMA_Fifo_t dma_fifo; /*!< Common software fifo structure */ + unsigned short int fifo_id; /*!< The fifo identifier (0 to + DMA_NUM_INJ_FIFOS_PER_GROUP-1). */ + + unsigned long long desc_count; /*!< The number of descriptors that have + ever been injected into this fifo. */ + + unsigned int occupiedSize; /*!< The number of 16B quads in the fifo that + are logically occupied. This does not + include the DMA_MIN_INJECT_SIZE_IN_QUADS + that always remains logically occupied. */ + /*! + * \note The following fields contain info about the fifo that affects the + * DCR values configuring the fifo. + */ + unsigned short int priority; /*!< 0 = Normal priority, 1 = High priority. + The DMA uses this to determine which + injection fifo to serve next. + Reflected in DCR addresses + _BGP_DCR_iDMA_FIFO_PRIORITY(i), where i + is the group_id. 0xD32 - 0xD35. + Fifo j is high priority if bit j in the + DCR is 1, otherwise it is normal + priority. */ + + unsigned short int local; /*!< 0 = non-local, 1 = local. + If 0, this fifo uses the torus and + ts_inj_map must be non-zero. + If 1, this fifo is used for tranfsers + local to the node only. + Reflected in DCR addresses + _BGP_DCR_iDMA_LOCAL_COPY(i), where i + is the group_id. 0xD5C - 0xD5F. + Fifo j is for local transfers if bit j + in the DCR is 1, otherwise it is for + torus transfers. */ + + unsigned char ts_inj_map; /*!< 8 bit vector mask indicating which torus + fifos can be used by this DMA fifo. + Reflected in DCR addresses + _BGP_DCR_iDMA_TS_INJ_FIFO_MAP(k) where k + is the fifo_id. 0xD3C - 0xD5B. + Fifo k can inject in torus fifo j if + bit j of the k'th DCR byte is 1. */ +} +DMA_InjFifo_t; + + +/*! + * \brief DMA Injection Fifo Status structure + * + * This structure maps the DMA SRAM for a particular group of + * DMA_NUM_INJ_FIFOS_PER_GROUP fifos. + * + */ +typedef struct DMA_InjFifoStatus_t +{ + volatile unsigned not_empty; /*!< R bitmask, 1 bit/fifo: + Injection FIFO not empty. */ + + volatile unsigned reserved_0; /*!< HOLE */ + + volatile unsigned available; /*!< R bitmask, 1 bit/fifo: + Injection FIFO available. */ + + volatile unsigned reserved_1; /*!< HOLE */ + + volatile unsigned threshold_crossed; /*!< R bitmask, 1 bit/fifo: + Threshold crossed. */ + + volatile unsigned reserved_2; /*!< HOLE */ + + volatile unsigned clear_threshold_crossed;/*!< W bitmask, 1 bit/fifo: + Clear threshold crossed. */ + + volatile unsigned reserved_3; /*!< HOLE */ + + volatile unsigned activated; /*!< R bitmask, 1 bit/fifo: + Retrieve activated fifos. */ + + volatile unsigned activate; /*!< W bitmask, 1 bit/fifo: + Set "1" to activate fifo. */ + + volatile unsigned deactivate; /*!< W bitmask, 1 bit/fifo: + Set "1" to deactivate fifo*/ +} +DMA_InjFifoStatus_t; + + +/*! + * \brief DMA Injection Fifo Group Structure + * + * This structure defines a DMA InjFifo Group. It points to a + * DMA InjFifo Status structure, and contains DMA_NUM_INJ_FIFOS_PER_GROUP + * DMA InjFifo structures. + * + * It is passed into the DMA_InjFifoGroupAllocate system call. + * The system call sets up the requested fifos, and fills in this fifo group + * structure, including the appropriate DMA InjFifo structures within it. + * + * It also contains permission bits to use the fifos, one bit per fifo. + * When the permission bit is on, the corresponding fifo belongs to this + * group and can be used. Otherwise, the fifo should not be used as part + * of this group. These permission bits are used as follows: + * 1. Inline functions will ASSERT when an attempt is made + * to use a fifo that is not part of this group. + * 2. Inline functions will use the permission bits as a mask + * to return status information only for fifos that are allocated + * to this group. + * + */ +typedef struct DMA_InjFifoGroup_t +{ + DMA_InjFifoStatus_t *status_ptr; /*!< Pointer to fifo status. */ + + DMA_InjFifo_t fifos[DMA_NUM_INJ_FIFOS_PER_GROUP];/*!< Array + of fifo structures. The i-th struct + is defined and usable only if + bit i of permissions = 1. */ + + unsigned int permissions; /*!< Permissions bit vector. Bit i is 1 + if permitted to use fifo i. The fifo + is allocated to this group. */ + + unsigned int group_id; /*!< The id of this group (0 to + DMA_NUM_INJ_FIFO_GROUPS-1). */ +} +DMA_InjFifoGroup_t; + + +/*! + * \brief Remote Get Fifo Full Handler Function Prototype + * + * A function with this signature receives control when one or more remote + * get fifos have filled. This function should do the following to help + * make space in the fifo(s): + * 1. Determine if there are any remote get fifos full or nearly full. + * 2. For each such fifo: + * 1. Allocate a larger fifo + * 2. Copy the descriptors from the old fifo to the new fifo + * 3. Call DMA_InjFifoInitById() to register the new fifo with the DMA + * 4. Call DMA_InjFifoSetTailById() to set the new fifo's tail pointer + * 5. Free the old fifo + * + * A function of this type can be registered on DMA_InjFifoGroupAllocate(). + * + * \param[in] fg_ptr Pointer to the fifo group associated with this fifo. + * \param[in] f_num The fifo number that has filled. This is + * relative to the DMA fifo group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] handler_param An opaque pointer provided by the caller who + * registered this handler. + */ +typedef void (*DMA_InjFifoRgetFifoFullHandler_t)( + DMA_InjFifoGroup_t *fg_ptr, + int f_num, + void *handler_parm + ); + + +/*! + * \brief Remote Get Fifo Full Handler Table Entry + * + * This defines an entry in the Remote Get Fifo Full Handler Table. + * It identifies the fifo group pointer associated with the full fifo, + * and the pointer to the handler function to receive control to handle + * the fifo full condition and the opaque pointer to be passed to the + * handler function when it is called. The core number of the core that + * will process the condition is associated with each entry. + */ +typedef struct DMA_InjFifoRgetFifoFullHandlerEntry_t +{ + DMA_InjFifoGroup_t *fg_ptr; /*!< Pointer to injection fifo group */ + DMA_InjFifoRgetFifoFullHandler_t handler; /*!< Pointer to handler function */ + void *handler_parm; /*!< Pointer to be passed to + the handler. */ + uint32_t core_num;/*!< Core number of the core that + will process the condition. */ +} DMA_InjFifoRgetFifoFullHandlerEntry_t; + + +/*! + * + * \brief Remote Get Fifo Full Handler Table + * + * An array of entries, one per injection fifo. Each entry specifies the fifo + * group structure and the handler function that will receive control to + * handle a remote get fifo full condition for fifos in that fifo group. + */ +extern DMA_InjFifoRgetFifoFullHandlerEntry_t DMA_RgetFifoFullHandlerTable[DMA_NUM_INJ_FIFOS]; + + +/*! + * \brief Remote Get Fifo Full Init Has Been Done Indicator + * + * 0 means the initialization has not been done. + * 1 means the initialization has been done. + */ +extern int DMA_InjFifoRgetFifoFullInitHasBeenDone; + + +/*! + * \brief Remote Get Fifo Full Initialization + * + * Initialize data structures and interrupt handlers to handle a remote get + * fifo full condition. + * + * \param[in] interruptGroup The handle that identifies the remote get fifo + * full interrupts (only one interrupt, in this + * case, group 3, irq 24). + * \param[in] rget_barrier A function pointer to a function that implments + * the barrier that is used by the handler function + * to synchronize all cores in the node as they + * each handle the interrupt (it is a broadcasted + * interrupt). + * \param[in] rget_barrier_arg The generic arg to pass to the barrier function. + */ +void DMA_InjFifoRgetFifoFullInit( Kernel_InterruptGroup_t interruptGroup, + void (*rget_barrier)(void *), + void *rget_barrier_arg ); + + +/*! + * \brief Query Free DMA InjFifos within a Group + * + * This function is a wrapper around a system call that returns a list of the + * free (available to be allocated) fifos within the specified group. + * + * \param[in] grp Group number being queried + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1) + * \param[out] num_fifos Pointer to an int where the number of free + * fifos in the specified group is returned + * \param[out] fifo_ids Pointer to an array of num_fifos short ints where + * the list of free fifos is returned. + * Each short int is the fifo number + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * The caller must provide space for + * DMA_NUM_INJ_FIFOS_PER_GROUP ints, + * in case the entire fifo group is free. + * + * \retval 0 Successful. num_fifos and fifo_ids array set as described. + * \retval -1 Unsuccessful. errno gives the reason. + * + */ +__INLINE__ int DMA_InjFifoGroupQueryFree( + int grp, + int *num_fifos, + int *fifo_ids + ) +{ + return Kernel_InjFifoGroupQueryFree( grp, + (uint32_t*)num_fifos, + (uint32_t*)fifo_ids); +} + + +/*! + * \brief Allocate DMA InjFifos From A Group + * + * This function is a wrapper around a system call that allocates specified + * DMA injection fifos from the specified group. Parameters specify whether + * each fifo is high or normal priority, local or non-local, and which torus + * fifos it maps to. A DMA_InjFifoGroup_t structure is returned for + * use in other inline functions to operate on the allocated fifos. + * + * Refer to the interrupt discussion at the top of this include file to see why + * there are no interrupt-related parameters. + * + * \param[in] grp Group number whose DMA injection fifos are being + * allocated (0 to DMA_NUM_INJ_FIFO_GROUPS-1) + * \param[in] num_fifos Number of fifos to be allocated from the group + * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP) + * \param[in] fifo_ids Pointer to an array of num_fifos ints where + * the list of fifos to be allocated is provided. + * Each int is the fifo number + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] priorities Pointer to an array of num_fifos short ints where + * the list of priorities to be assigned to the fifos + * is provided. Each short int indicates the priority + * to be assigned to each of the fifos identified in + * the fifo_ids array (0 is normal, 1 is high priority). + * \param[in] locals Pointer to an array of num_fifos short ints where + * an indication is provided of whether each fifo will + * be used for local transfers (within the same node) + * or torus transfers. Each short int indicates the + * local/non-local attribute to be assigned to each of + * the fifos identified in the fifo_ids array (0 is + * non-local, 1 is local). If 0, the corresponding + * array element in ts_inj_maps indicates which torus + * fifos can be injected. + * \param[in] ts_inj_maps Pointer to an array of num_fifos chars where + * the torus fifos that can be injected are specified + * for each fifo. Each char specifies which of + * the 8 torus injection fifos can be injected when a + * descriptor is injected into the DMA injection fifo. + * Must be non-zero when the corresponding "locals" + * is 0. + * Bits 0-3 are for torus group 0. + * Bits 4-7 are for torus group 1. + * Bits 3 and 7 are the high priority fifos. + * \param[in] rget_handler Pointer to a function with prototype + * DMA_InjFifoRgetFifoFullHandler_t that will handle + * a remote get fifo full condition for fifos in this + * fifo group. If NULL is specified, the condition + * will not be handled. + * \param[in] rget_handler_parm A pointer to opaque storage that will be + * passed to the rget_handler. + * \param[in] rget_interruptGroup A InterruptGroup_t that identifies the + * group of interrupts that handle the remote get + * fifo full condition. It is only one interrupt: + * group 3, irq 24. + * \param[in] rget_barrier Function point to a function that implements + * a barrier that is used by the rget fifo full + * interrupt handler. This barrier should be across + * all cores of all active processes on this compute node. + * \param[in] rget_barrier_arg Generic arg to pass to barrier function. + * \param[out] fg_ptr Pointer to a structure that is filled in upon + * successful return for use in other inline functions + * to operate on the allocated fifos. + * \li fifos - Array of fifo structures. Structures + * for allocated fifos are initialized as + * documented below. Structures for + * fifos not allocated by this instance of + * this syscall are initialized to binary + * zeros. Allocated fifos are enabled. + * \li status_ptr - Points to status area within the + * DMA memory map. + * \li permissions - Bits indicating which fifos were + * allocated during this syscall. + * \li group_id - The id of this group. + * + * \retval 0 Successful. Fifos allocated and fg_ptr structure filled in as + * described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \return The group fifo structure pointed to by fg_ptr is completely + * initialized as follows: + * - status_ptr points to the appropriate fifo group DMA memory map + * - fifo structures array. Fifo structures for fifos not allocated + * during this syscall are initialized to binary zeros. Fifo + * structures for fifos allocated during this syscall are initialized: + * - fifo_hw_ptr points to the DMA memory map for this fifo. The + * hardware start, end, head, and tail are set to zero by the + * kernel. + * - All other fields in the structure are set to zero by the kernel + * except priority, local, and ts_inj_map are set to reflect what + * was requested in the priorities, locals, and ts_inj_maps + * syscall parameters. + * + */ +__INLINE__ int DMA_InjFifoGroupAllocate( + int grp, + int num_fifos, + int *fifo_ids, + unsigned short int *priorities, + unsigned short int *locals, + unsigned char *ts_inj_maps, + DMA_InjFifoRgetFifoFullHandler_t rget_handler, + void *rget_handler_parm, + Kernel_InterruptGroup_t rget_interruptGroup, + void (*rget_barrier)(void *), + void *rget_barrier_arg, + DMA_InjFifoGroup_t *fg_ptr + ) +{ + int rc; + int i, global_fifo_id; + + rc = Kernel_InjFifoGroupAllocate( grp, + num_fifos, + (uint32_t*)fifo_ids, + (uint16_t*)priorities, + (uint16_t*)locals, + (uint8_t*)ts_inj_maps, + (uint32_t*)fg_ptr); + + if ( rc == 0 ) + { + /* + * If a remote get fifo full handler has been provided, update the table + * to indicate that this handler will handle full conditions on the fifos + * just allocated. + */ + if ( rget_handler ) + { + /* + * If rget handler init has not been done, do it: + */ + if ( DMA_InjFifoRgetFifoFullInitHasBeenDone == 0 ) + DMA_InjFifoRgetFifoFullInit( rget_interruptGroup, + rget_barrier, + rget_barrier_arg ); + + for (i=0; i<num_fifos; i++) + { + global_fifo_id = (grp * DMA_NUM_INJ_FIFOS_PER_GROUP) + fifo_ids[i]; + DMA_RgetFifoFullHandlerTable[global_fifo_id].fg_ptr = fg_ptr; + DMA_RgetFifoFullHandlerTable[global_fifo_id].handler = rget_handler; + DMA_RgetFifoFullHandlerTable[global_fifo_id].handler_parm = + rget_handler_parm; + DMA_RgetFifoFullHandlerTable[global_fifo_id].core_num= + Kernel_PhysicalProcessorID(); + } + + /* + * Indicate done with initialization. + */ + DMA_InjFifoRgetFifoFullInitHasBeenDone = 1; + } + } + + return(rc); +} + + +/*! + * \brief Free DMA InjFifos From A Group + * + * This function is a wrapper around a system call that frees DMA injection + * counters from the specified group. + * + * \param[in] grp Group number whose DMA injection fifos are being + * freed (0 to DMA_NUM_INJ_FIFO_GROUPS-1) + * \param[in] num_fifos Number of fifos to be freed from the group + * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP) + * \param[in] fifo_ids Pointer to an array of num_fifos ints where + * the list of fifos to be freed is provided. + * Each int is the fifo number (0 to num_fifos-1). + * \param[in] fg_ptr Pointer to the structure previously filled in when + * these fifos were allocated. Upon successful + * return, this structure is updated to reflect the + * freed fifos: + * \li fifos - Structures for freed fifos zero'd. + * Freed fifos are disabled. + * \li permissions - Bits cleared for each freed fifo. + * + * \retval 0 Successful. Fifos freed and fg_ptr structure updated as described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \note This is a fatal error if any of the fifos are non empty and activated + * + */ +__INLINE__ int DMA_InjFifoGroupFree( + int grp, + int num_fifos, + int *fifo_ids, + DMA_InjFifoGroup_t *fg_ptr + ) +{ + return Kernel_InjFifoGroupFree( grp, + num_fifos, + (uint32_t*)fifo_ids, + (uint32_t*)fg_ptr); +} + + + + +/* + * ----------------------------------------------------------------------------- + * Calls to access the Fifo, given a pointer to the injection fifo structure + * ----------------------------------------------------------------------------- + */ + + + + +/*! + * \brief Set DMA Injection Fifo Head + * + * Set a DMA injection fifo's "head", given an injection fifo structure + * + * \param[in] f_ptr Pointer to the injection fifo structure + * \param[in] va_head Virtual address of the head to be set + * + * \return None + * + * \post va_head is set in both the hardware and software fifo structures, + * and the fifo free space is recalculated. + * + * \note Normally, for an injection fifo, the dma manipulates the head, but in + * optimized persistant communications the core can do it if it is sure + * the fifo is empty at the time this is called. + */ +__INLINE__ void DMA_InjFifoSetHead( + DMA_InjFifo_t *f_ptr, + void *va_head + ) +{ + SPI_assert( f_ptr != NULL ); + + DMA_FifoSetHead( &f_ptr->dma_fifo, + va_head ); +} + + +/*! + * \brief Increment DMA Injection Fifo Tail + * + * Increment a DMA injection fifo's "tail", given an injection fifo structure + * + * \param[in] f_ptr Pointer to the injection fifo structure + * \param[in] incr The number of quads (16 byte units) to increment the + * tail pointer by. This value must be even (ie. descriptors + * are 32 bytes). + * + * \retval None + * + * \post va_tail is set in both the hardware and software fifo structures, + * the fifo free space is recalculated, and the fifo's descriptor count + * is incremented according to the incr. + * + * \note This function does not check if there is free space in the fifo + * for this many quads. It must be preceeded by a check of the + * free space. + */ +__INLINE__ void DMA_InjFifoIncrementTail( + DMA_InjFifo_t *f_ptr, + unsigned int incr + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( (incr & 0x1) == 0 ); + + { + void *va_tail = DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ); + + void *va_end = DMA_FifoGetEndFromShadow( &f_ptr->dma_fifo ); + + unsigned int incr_bytes = incr << 4; + + unsigned int bytes_to_end = (unsigned)va_end - (unsigned)va_tail; + + /* + * Note: The following check must be >= instead of just >. We never want + * the tail to be equal to the end so we can always copy a descriptor + * to the tail, safely. + */ + if ( incr_bytes >= bytes_to_end ) + { + va_tail = (char *) + ( (unsigned)DMA_FifoGetStartFromShadow( &f_ptr->dma_fifo ) + + ( incr_bytes - bytes_to_end ) ); + } + else + { + va_tail = (char *)( (unsigned)va_tail + incr_bytes ); + } + + DMA_FifoSetTail( &f_ptr->dma_fifo, + va_tail ); + + f_ptr->desc_count += (incr >> 1); + } + +} + + +/*! + * \brief Get DMA Injection Fifo Descriptor Count + * + * Get a DMA injection fifo's "descriptor count", given an injection fifo + * structure + * + * \param[in] f_ptr Pointer to the injection fifo structure + * + * \retval desc_count The descriptor count for the specified fifo + * + */ +__INLINE__ unsigned long long DMA_InjFifoGetDescriptorCount( + DMA_InjFifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + + return f_ptr->desc_count; +} + + +/*! + * \brief Is DMA Descriptor Done + * + * Return whether a specified descriptor is still in the specified injection + * fifo (not done). The descriptor is identified by the descriptor count + * immediately after the descriptor was injected into the fifo (returned by + * DMA_InjFifoIncrementTail(). + * + * \param[in] f_ptr Pointer to the injection fifo structure + * \param[in] desc_count The descriptor count immediately after the + * descriptor in question was injected into + * the fifo. + * \param[in] update 0 Do not update the fifo's shadow information. + * 1 Update the fifo's shadow information. + * It is a performance optimization to only update the + * shadow information once for a group of descriptors + * being processed. + * + * \retval 0 False. The descriptor identified by desc_count is not done. + * It is still in the fifo. + * \retval 1 True. The descriptor identified by desc_count is done. + * It is no longer in the fifo. + * + */ +__INLINE__ unsigned int DMA_InjFifoIsDescriptorDone( + DMA_InjFifo_t *f_ptr, + unsigned long long desc_count, + unsigned int update + ) +{ + unsigned long long num_desc_in_fifo; + unsigned int free_space; + DMA_Fifo_t *fifo_ptr; + + SPI_assert( f_ptr != NULL ); + + fifo_ptr = &(f_ptr->dma_fifo); + + /* If caller wants a fresh look in the fifo, update its free space. + * Otherwise, fetch the free space based on shadows. + */ + if (update) + free_space = DMA_FifoGetFreeSpace (fifo_ptr, 1, 0); + else + free_space = DMA_FifoGetFreeSpaceNoUpdateCalculation(fifo_ptr); + + /* Compute the desc_count of the oldest descriptor in the fifo (minus 1) + * Note: Each desc is a 32B unit and the below are 16B entities + */ + num_desc_in_fifo = ( DMA_FifoGetSize(fifo_ptr) - free_space ) / 2; + + /* Determine if the specified desc_count is still in the fifo. + * We take the current descriptor count for this fifo and subtract the + * number of descriptors still in the fifo. This is the descriptor count + * of the oldest descriptor still remaining in the fifo (minus 1). + * We compare that with the caller's desc_count to determine if the + * caller's descriptor is still in the fifo. + */ + if ( desc_count <= (DMA_InjFifoGetDescriptorCount(f_ptr) - num_desc_in_fifo) ) + return (1); /* Descriptor is done */ + else + return (0); /* Descriptor is not done */ + +} + + +/*! + * \brief DMA Injection Fifo Reserve Descriptor Storage + * + * Reserve storage in a DMA injection fifo for a remote get descriptor, given + * an injection fifo structure. + * + * \param[in] f_ptr Pointer to the injection fifo structure + * + * \retval 0 Successful. There was enough space in the fifo and the + * storage was reserved. + * \retval -1 Unsuccessful. There was not enough space in the fifo. + * + * \note Internally, this increments the occupiedSize of the fifo by + * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS. + * + */ +__INLINE__ int DMA_InjFifoReserveDescriptorStorage( + DMA_InjFifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + + if ( (DMA_FifoGetSize(&f_ptr->dma_fifo) - f_ptr->occupiedSize) >= + (DMA_MIN_INJECT_SIZE_IN_QUADS + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) ) { + f_ptr->occupiedSize += DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS; + return (0); + } + else { + return (-1); + } +} + + +/*! + * \brief DMA Injection Fifo Free Descriptor Storage Reservation + * + * Free a reservation for storage for a remote get descriptor in a DMA injection + * fifo, previously reserved using DMA_InjFifoReserveDescriptorStorageById(). + * + * \param[in] f_ptr Pointer to the injection fifo structure + * + * \return None + * + * \note Internally, this decrements the occupiedSize of the fifo by + * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS. + * + */ +__INLINE__ void DMA_InjFifoFreeDescriptorStorageReservation( + DMA_InjFifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( f_ptr->occupiedSize >= DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ); + + f_ptr->occupiedSize -= DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS; +} + + +/*! + * \brief Check If An Injection Fifo Has Space For Injection + * + * Check if an injection fifo has enough space for a single descriptor to be + * injected. + * + * \param[in] f_ptr Pointer to the injection fifo structure + * + * \retval hasSpace An indicator of whether the fifo has space for a + * descriptor. + * - 0 (false) means the fifo is full. + * - 1 (true) means the fifo has space. + * + */ +__INLINE__ unsigned int DMA_InjFifoHasSpace( + DMA_InjFifo_t *f_ptr + ) +{ + SPI_assert( f_ptr != NULL ); + + { + unsigned int free_space; + + /* Get the free space in the fifo using the shadow value */ + free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo, + 0, /* Use shadow head */ + 0);/* use shadow tail */ + + /* + * If after injecting, (DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS is the amount we + * are going to inject), there is still at least the minimum allowable free + * space left in the fifo, go ahead and inject. We want at least + * DMA_MIN_INJECT_SIZE_IN_QUADS free space after injection. + * + * Otherwise, read the hardware head pointer and recalculate the free space, + * and check again. Note: We don't need to read the hardware tail + * pointer because only software updates that, and we recalculate the + * free space at that time. + * + * If there is still not enough room in the fifo, return 0, indicating that + * the descriptor could not be injected. + * + */ + if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS + + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) + { + free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo, + 1, /* Use hardware head */ + 0); /* Use shadow tail */ + + if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS + + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) return 0; + } + + return 1; /* There is space in the fifo. */ + } +} + +/* a 32-byte memcpy */ +static inline void DMA_DescriptorToFifo(char *store_ptr, const char *load_ptr) +{ + int * store_int=(int *) store_ptr ; + int * load_int= (int *) load_ptr ; + int v0 = load_int[0] ; + int v1 = load_int[1] ; + store_int[0] = v0 ; + v0 = load_int[2] ; + store_int[1] = v1 ; + v1 = load_int[3] ; + store_int[2] = v0 ; + v0 = load_int[4] ; + store_int[3] = v1 ; + v1 = load_int[5] ; + store_int[4] = v0 ; + v0 = load_int[6] ; + store_int[5] = v1 ; + v1 = load_int[7] ; + store_int[6] = v0 ; + store_int[7] = v1 ; +} +/*! + * \brief Inject a Descriptor into a DMA Injection Fifo Without Checking for + * Space + * + * Inject a descriptor into a DMA injection fifo, given an injection fifo + * structure, without checking to see if there is enough space in the fifo. + * It is assumed that the caller has already checked for enough space using + * the DMA_InjFifoHasSpace() function. + * + * \param[in] f_ptr Pointer to the injection fifo structure + * \param[in] desc A pointer to the descriptor to be injected. + * Must be 16-byte aligned. + * + * \retval numDescInjected The number of descriptors injected. + * - 1 means it was successfully injected. + * + * \see DMA_InjFifoHasSpace() + */ +__INLINE__ int DMA_InjFifoInjectDescriptorNoSpaceCheck( + DMA_InjFifo_t *f_ptr, + DMA_InjDescriptor_t *desc + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( desc != NULL ); + + { + char *load_ptr, *store_ptr; + + /* + * Copy the descriptor to the current va_tail of the fifo. + * Msync to ensure the descriptor has been written to memory and the L1 caches + * are in sync. + * Move the tail past the descriptor so the DMA knows the descriptor is there. + * - handle wrapping + * - update free space + * + */ + + if( k_use_fp_to_inject) + { + if ( ( (unsigned)desc & 0xFFFFFFF0 ) == (unsigned)desc ) /* 16B aligned? */ + { + load_ptr = (char*)desc; + store_ptr = (char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ); + _bgp_QuadLoad ( load_ptr, 0 ); + _bgp_QuadLoad ( load_ptr+16, 1 ); + _bgp_QuadStore( store_ptr, 0 ); + _bgp_QuadStore( store_ptr+16, 1 ); + } + else + { + memcpy( DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ), + desc, + DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES ); + } + } + else + { + DMA_DescriptorToFifo((char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),(char*)desc) ; + } + + /* _bgp_msync(); mbar is good enough */ + _bgp_mbar(); + + DMA_InjFifoIncrementTail( f_ptr, + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ); + + return 1; /* Success */ + } +} + + +/*! + * \brief Inject a Descriptor into a DMA Injection Fifo + * + * Inject a descriptor into a DMA injection fifo, given an injection fifo + * structure + * + * \param[in] f_ptr Pointer to the injection fifo structure + * \param[in] desc A pointer to the descriptor to be injected. + * Must be 16-byte aligned. + * + * \retval numDescInjected The number of descriptors injected. + * - 0 means it was not injected, most likely because + * the fifo is full. + * - 1 means it was successfully injected + * + * Caution: If you call this function two or more times in quick + * succession to try to put descriptors into a FIFO, occasionally + * one of the descriptors appears not to be acted on by the hardware. + * An alternative is to use DMA_InjFifoInjectDescriptors with a vector + * of descriptors; this appears to do the job reliably. + */ +__INLINE__ int DMA_InjFifoInjectDescriptor( + DMA_InjFifo_t *f_ptr, + DMA_InjDescriptor_t *desc + ) +{ + SPI_assert( f_ptr != NULL ); + SPI_assert( desc != NULL ); + + { + unsigned int free_space; + char *load_ptr, *store_ptr; + + /* Get the free space in the fifo using the shadow value */ + free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo, + 0, /* Use shadow head */ + 0);/* use shadow tail */ + + /* + * If after injecting, (DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS is the amount we + * are going to inject), there is still at least the minimum allowable free + * space left in the fifo, go ahead and inject. We want at least + * DMA_MIN_INJECT_SIZE_IN_QUADS free space after injection. + * + * Otherwise, read the hardware head pointer and recalculate the free space, + * and check again. Note: We don't need to read the hardware tail + * pointer because only software updates that, and we recalculate the + * free space at that time. + * + * If there is still not enough room in the fifo, return 0, indicating that + * the descriptor could not be injected. + * + */ + if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS + + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) + { + free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo, + 1, /* Use hardware head */ + 0); /* Use shadow tail */ + + if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS + + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) return 0; + } + + /* + * We have enough room in the fifo. + * Copy the descriptor to the current va_tail of the fifo. + * Msync to ensure the descriptor has been written to memory and the L1 caches + * are in sync. + * Move the tail past the descriptor so the DMA knows the descriptor is there. + * - handle wrapping + * - update free space + * + */ + + if( k_use_fp_to_inject) + { + if ( ( (unsigned)desc & 0xFFFFFFF0 ) == (unsigned)desc ) /* 16B aligned? */ + { + load_ptr = (char*)desc; + store_ptr = (char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ); + _bgp_QuadLoad ( load_ptr, 0 ); + _bgp_QuadLoad ( load_ptr+16, 1 ); + _bgp_QuadStore( store_ptr, 0 ); + _bgp_QuadStore( store_ptr+16, 1 ); + } + else + { + memcpy( DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ), + desc, + DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES ); + } + } + else + { + DMA_DescriptorToFifo((char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),(char*)desc) ; + } + + /* _bgp_msync(); mbar is good enough */ + _bgp_mbar(); + + DMA_InjFifoIncrementTail( f_ptr, + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ); + + return 1; /* Success */ + } +} + + +/*! + * \brief Inject Multiple Descriptors into a DMA Injection Fifo + * + * Inject multiple descriptors into a DMA injection fifo, given an injection fifo + * structure + * + * \param[in] f_ptr Pointer to the injection fifo structure + * \param[in] num_desc Number of descriptors to be injected + * \param[in] desc A pointer to an array of pointers to descriptors to be + * injected. The descriptors must be 16-byte aligned. + * + * \retval numDescInjected The number of descriptors injected. + * - less than num_desc means some were not injected, + * most likely because the fifo is full. + * - num_desc means all were successfully injected + * + */ +#if 0 +__INLINE__ int DMA_InjFifoInjectDescriptors( + DMA_InjFifo_t *f_ptr, + int num_desc, + DMA_InjDescriptor_t **desc + ) +{ + int i; + int rc=0 ; + for(i=0;i<num_desc;i+=1) + { + int rc0=DMA_InjFifoInjectDescriptor(f_ptr,desc[i]) ; + rc += rc0 ; + } + return rc ; +} +#else +__INLINE__ int DMA_InjFifoInjectDescriptors( + DMA_InjFifo_t *f_ptr, + int num_desc, + DMA_InjDescriptor_t **desc + ) +{ + unsigned int free_space; + unsigned int total_space_needed_in_quads; + void *va_tail; + void *va_end; + void *va_start; + char *target; + unsigned int num_quads_to_inject, num_quads_remaining; + int i; + char *load_ptr, *store_ptr; + + SPI_assert( f_ptr != NULL ); + SPI_assert( desc != NULL ); + SPI_assert( num_desc > 0 ); + + /* Get the free space in the fifo using the shadow value */ + free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo, + 0, /* Use shadow head */ + 0);/* Use shadow tail */ + + total_space_needed_in_quads = num_desc * + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS; + + /* + * If after injecting all descriptors (DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS + * per descriptor is the amount we are going to inject), there is still at + * least the minimum allowable free space left in the fifo, go ahead and + * inject. We want at least DMA_MIN_INJECT_SIZE_IN_QUADS free space + * after injection. + * + * Otherwise, read the hardware head pointer and recalculate the free space, + * and check again. + * + * If there is still not enough room in the fifo for any descriptors, + * return 0. Otherwise, continue and inject as many descriptors as possible. + * + */ + if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS + + total_space_needed_in_quads ) + { + free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo, + 1, /* Use hardware head */ + 0); /* Use shadow tail */ + + if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS + + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) return 0; + } + + /* + * We have enough room in the fifo for at least some descriptors. + * Copy the descriptors (as many as will fit) to the current va_tail of the + * fifo. + * Msync to ensure the descriptor has been written to memory and the L1 caches + * are in sync. + * Move the tail past the descriptor so the DMA knows the descriptor is there. + * - handle wrapping + * - update free space + * + */ + va_tail = DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ); + va_start = DMA_FifoGetStartFromShadow( &f_ptr->dma_fifo ); + va_end = DMA_FifoGetEndFromShadow( &f_ptr->dma_fifo ); + target = (char*)va_tail; + + if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS + total_space_needed_in_quads ) { + num_quads_to_inject = free_space - DMA_MIN_INJECT_SIZE_IN_QUADS; + } + else { + num_quads_to_inject = total_space_needed_in_quads; + } + num_quads_remaining = num_quads_to_inject; + i = 0; + + while ( num_quads_remaining > 0 ) + { + SPI_assert( desc[i] != NULL ); + + if( k_use_fp_to_inject) + { + if ( ( (unsigned)desc[i] & 0xFFFFFFF0 ) == (unsigned)desc[i] ) /* 16B aligned? */ + { + load_ptr = (char*)desc[i]; + store_ptr = (char*)target; + _bgp_QuadLoad ( load_ptr, 0 ); + _bgp_QuadLoad ( load_ptr+16, 1 ); + _bgp_QuadStore( store_ptr, 0 ); + _bgp_QuadStore( store_ptr+16, 1 ); + } + else + { + memcpy( (char*)target, + desc[i], + DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES ); + } + } + else + { + DMA_DescriptorToFifo(target,(char*)(desc[i])) ; + } + + i++; + num_quads_remaining -= DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS; + target += DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES; + if ( target >= (char*)va_end ) + target = (char*)va_start; + } + + /* _bgp_msync(); mbar good enough */ + _bgp_mbar(); + + DMA_InjFifoIncrementTail( f_ptr, + num_quads_to_inject ); + + return i; /* Success */ + +} +#endif + +/*! + * \brief Get DMA Injection Fifo Group Number + * + * Get the DMA Injection Fifo Group number, given an injection fifo group + * structure. + * + * \param[in] fg_ptr Pointer to the structure previously filled in when the + * group was allocated. + * + * \return The DMA Injection Fifo Group number + * + */ +__INLINE__ int DMA_InjFifoGetGroupNum( + const DMA_InjFifoGroup_t *fg_ptr + ) +{ + SPI_assert( fg_ptr != NULL ); + + return fg_ptr->group_id; +} + + +/*! + * \brief Get the "Not Empty" Status of an Injection Fifo Group + * + * Get the "Not Empty" status of the fifos that the specified fifo group has + * permission to use. + * + * \param[in] fg_ptr Pointer to the injection fifo group structure + * + * \retval notEmptyStatus A 32-bit value, one bit per fifo. + * Bit i is 1 if the specified fifo group has + * permission to use fifo i and fifo i is not + * empty. + * Bit i is 0 if the specified fifo group either + * does not have permission to use fifo i, or fifo i + * is empty. + * + */ +__INLINE__ unsigned DMA_InjFifoGetNotEmpty( + DMA_InjFifoGroup_t *fg_ptr + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + + return ( fg_ptr->status_ptr->not_empty & fg_ptr->permissions ); + +} + + +/*! + * \brief Get the "available" Status of an Injection Fifo Group + * + * Get the "available" status of the fifos that the specified fifo group has + * permission to use. "available" means that the fifo is enabled and + * activated. + * + * \param[in] fg_ptr Pointer to the injection fifo group structure + * + * \retval availableStatus A 32-bit value, one bit per fifo. + * Bit i is 1 if the specified fifo group has + * permission to use fifo i and fifo i is available + * Bit i is 0 if the specified fifo group either + * does not have permission to use fifo i, or fifo i + * is not available. + * + * \note Normally, there should be a 1 in every position except those that + * the specified fifo group does not have permission to use. + * + */ +__INLINE__ unsigned DMA_InjFifoGetAvailable( + DMA_InjFifoGroup_t *fg_ptr + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + + return ( fg_ptr->status_ptr->available & fg_ptr->permissions ); + +} + + +/*! + * \brief Get the "threshold crossed" Status of an Injection Fifo Group + * + * Get the "threshold crossed" status of the fifos that the specified fifo + * group has permission to use. + * + * \param[in] fg_ptr Pointer to the injection fifo group structure + * + * \retval thresholdCrossedStatus A 32-bit value, one bit per fifo. + * Bit i is 1 if the specified fifo group has + * permission to use fifo i and fifo i has crossed + * a threshold. + * Bit i is 0 if the specified fifo group either + * does not have permission to use fifo i, or fifo i + * has not crossed a threshold. + * + * \note Normally, there should be a 0 in every position. + * + */ +__INLINE__ unsigned DMA_InjFifoGetThresholdCrossed( + DMA_InjFifoGroup_t *fg_ptr + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + + return ( fg_ptr->status_ptr->threshold_crossed & fg_ptr->permissions ); + +} + + +/*! + * \brief Set the "clear threshold crossed" Status of an Injection Fifo Group + * + * Set the "clear threshold crossed" status of the fifos that the specified fifo + * group has permission to use. + * + * \param[in] fg_ptr Pointer to the injection fifo group structure + * \param[in] clr A 32-bit value, one bit per fifo. + * Only bits that the specified fifo group has + * permission to use should be set to 1. + * Set bit i to 1 to clear the threshold crossed status + * of fifo i. + * + * \return None + * + */ +__INLINE__ void DMA_InjFifoSetClearThresholdCrossed( + DMA_InjFifoGroup_t *fg_ptr, + unsigned int clr + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + SPI_assert( (clr & fg_ptr->permissions) == clr ); + + fg_ptr->status_ptr->clear_threshold_crossed = clr; + +} + + +/*! + * \brief Get the "activated" Status of an Injection Fifo Group + * + * Get the "activated" status of the fifos that the specified fifo + * group has permission to use. + * + * \param[in] fg_ptr Pointer to the injection fifo group structure + * + * \retval activatedStatus A 32-bit value, one bit per fifo. + * Bit i is 1 if the specified fifo group has + * permission to use fifo i and fifo i is activated + * Bit i is 0 if the specified fifo group either + * does not have permission to use fifo i, or fifo i + * is not activated. + * + */ +__INLINE__ unsigned DMA_InjFifoGetActivated( + DMA_InjFifoGroup_t *fg_ptr + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + + return ( fg_ptr->status_ptr->activated & fg_ptr->permissions ); + +} + + +/*! + * \brief Set the "activate" Status of an Injection Fifo Group + * + * Set the "activate" status of the fifos that the specified fifo + * group has permission to use. + * + * \param[in] fg_ptr Pointer to the injection fifo group structure + * \param[in] act A 32-bit value, one bit per fifo. + * Only bits that the specified fifo group has + * permission to use should be set to 1. + * Set bit i to 1 to activate fifo i. + * + * \return None + * + */ +__INLINE__ void DMA_InjFifoSetActivate( + DMA_InjFifoGroup_t *fg_ptr, + unsigned int act + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + SPI_assert( (act & fg_ptr->permissions) == act ); + + fg_ptr->status_ptr->activate = act; + +} + + +/*! + * \brief Set the "deactivate" Status of an Injection Fifo Group + * + * Set the "deactivate" status of the fifos that the specified fifo + * group has permission to use. + * + * \param[in] fg_ptr Pointer to the injection fifo group structure + * \param[in] deact A 32-bit value, one bit per fifo. + * Only bits that the specified fifo group has + * permission to use should be set to 1. + * Set bit i to 1 to deactivate fifo i. + * + * \return None + * + */ +__INLINE__ void DMA_InjFifoSetDeactivate( + DMA_InjFifoGroup_t *fg_ptr, + unsigned int deact + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + SPI_assert( (deact & fg_ptr->permissions) == deact ); + + fg_ptr->status_ptr->deactivate = deact; + +} + + + + +/* + * ----------------------------------------------------------------------------- + * Calls to access the Fifo, given a fifo group and a fifo ID + * ----------------------------------------------------------------------------- + */ + + + + +/*! + * \brief DMA InjFifo Initialization By Id + * + * - For an allocated injection DMA fifo, initialize its start, head, tail, and + * end. + * - Compute fifo size and free space. + * - Initialize descriptor count. + * - Activate the fifo. + * + * \param[in] fg_ptr Pointer to fifo group structure. + * \param[in] fifo_id Id of the fifo to be initialized + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] va_start Virtual address of the start of the fifo. + * \param[in] va_head Virtual address of the head of the fifo (typically + * equal to va_start). + * \param[in] va_end Virtual address of the end of the fifo. + * + * \retval 0 Successful. + * \retval -1 Unsuccessful. Error checks include + * - va_start < va_end + * - va_start <= va_head <= + * (va_end - DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) + * - va_start and va_end are 32-byte aligned + * - fifo_size is larger than (DMA_MIN_INJECT_SIZE_IN_QUADS + + * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) + * + */ +__INLINE__ int DMA_InjFifoInitById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + void *va_start, + void *va_head, + void *va_end + ) +{ + int rc; + + SPI_assert( fg_ptr != NULL ); + SPI_assert( fifo_id >= 0 && fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP ); + SPI_assert( (fg_ptr->permissions & _BN(fifo_id)) != 0 ); + SPI_assert( va_start < va_end ); + SPI_assert( va_start <= va_head ); + SPI_assert( ((uint32_t) va_head) <= ( ((uint32_t) va_end) - DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES) ); + SPI_assert( ( ( (uint32_t) va_start) & 0xFFFFFFE0) == (uint32_t) va_start ); + SPI_assert( ( ( (uint32_t) va_end ) & 0xFFFFFFE0) == (uint32_t) va_end ); + SPI_assert( ( (unsigned)va_end - (unsigned)va_start ) >= + ( (DMA_MIN_INJECT_SIZE_IN_QUADS + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) * 16 ) ); + + /* + * Initialize the fifo by invoking a system call. This system call + * deactivates the fifo, initializes the start, end, head, and tail, + * and activates the fifo. + */ + + rc = Kernel_InjFifoInitById( + (uint32_t*)fg_ptr, + fifo_id, + (uint32_t*)va_start, + (uint32_t*)va_head, + (uint32_t*) va_end + ); + + if (rc) return rc; + + /* Initialize the descriptor count */ + fg_ptr->fifos[fifo_id].desc_count = 0; + + return 0; +} + + +/*! + * \brief Get DMA InjFifo Start Pointer from the Shadow Using a Fifo Id + * + * Get a DMA injection fifo's start pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval va_start The virtual address of the start of the fifo + * + */ +__INLINE__ void * DMA_InjFifoGetStartFromShadowById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_FifoGetStartFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA InjFifo Head Pointer Using a Fifo Id + * + * Get a DMA injection fifo's head pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval va_head The virtual address of the head of the fifo. + * + */ +__INLINE__ void * DMA_InjFifoGetHeadById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_FifoGetHead( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA InjFifo Tail Pointer Using a Fifo Id + * + * Get a DMA injection fifo's tail pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval va_tail The virtual address of the tail of the fifo + * + */ +__INLINE__ void *DMA_InjFifoGetTailById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_FifoGetTail( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA InjFifo End Pointer from the Shadow Using a Fifo Id + * + * Get a DMA injection fifo's end pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval va_end The virtual address of the end of the fifo + * + */ +__INLINE__ void *DMA_InjFifoGetEndById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_FifoGetEndFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA InjFifo Size Using a Fifo Id + * + * Get a DMA injection fifo's size, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval size The size of the DMA fifo, in units of 16B quads. + * + */ +__INLINE__ unsigned int DMA_InjFifoGetSizeById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_FifoGetSize( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA InjFifo Free Space Using a Fifo Id + * + * Get a DMA injection fifo's free space, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] read_head Indicates whether to read the head from the hardware + * fifo before calculating the free space. + * - 1 means to read the hardware head + * - 0 means to use the current head shadow + * \param[in] read_tail Indicates whether to read the tail from the hardware + * fifo before calculating the free space. + * - 1 means to read the hardware tail + * - 0 means to use the current tail shadow + * + * \retval freeSpace The amount of free space in the fifo, in units of + * 16B quads. + * + */ +__INLINE__ unsigned int DMA_InjFifoGetFreeSpaceById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + unsigned int read_head, + unsigned int read_tail + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_FifoGetFreeSpace( &fg_ptr->fifos[fifo_id].dma_fifo, + read_head, + read_tail ); +} + + +/*! + * \brief Set DMA InjFifo Head Pointer Using a Fifo Id + * + * Set a DMA injection fifo's head pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] va_head The virtual address of the head of the fifo. + * + * \return None + * + */ +__INLINE__ void DMA_InjFifoSetHeadById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + void *va_head + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + DMA_InjFifoSetHead( &fg_ptr->fifos[fifo_id], + va_head); +} + + +/*! + * \brief Set DMA InjFifo Tail Pointer Using a Fifo Id + * + * Set a DMA injection fifo's tail pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] va_tail The virtual address of the tail of the fifo. + * + * \return None + * + */ +__INLINE__ void DMA_InjFifoSetTailById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + void *va_tail + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + DMA_FifoSetTail( &fg_ptr->fifos[fifo_id].dma_fifo, + va_tail); +} + + +/*! + * \brief Increment DMA InjFifo Tail Pointer Using a Fifo Id + * + * Increment a DMA injection fifo's tail pointer, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] incr The number of quads (16 byte units) to increment the + * tail pointer by. + * + * \return None + * + * \note This function does not check if there is free space in the fifo + * for this many quads. It must be preceeded by a check of the + * free space. +*/ +__INLINE__ void DMA_InjFifoIncrementTailById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + unsigned int incr + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + DMA_InjFifoIncrementTail( &fg_ptr->fifos[fifo_id], + incr); +} + + +/*! + * \brief Get DMA InjFifo Descriptor Count Using a Fifo Id + * + * Get a DMA injection fifo's descriptor count, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \return None + * + */ +__INLINE__ unsigned long long DMA_InjFifoGetDescriptorCountById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_InjFifoGetDescriptorCount( &fg_ptr->fifos[fifo_id] ); +} + + +/*! + * \brief Is DMA Descriptor Done Using a Fifo Id + * + * Return whether a specified descriptor is still in the specified injection + * fifo (not done). The descriptor is identified by the descriptor count + * immediately after the descriptor was injected into the fifo (returned by + * DMA_InjFifoIncrementTail(). + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] desc_count The descriptor count immediately after the + * descriptor in question was injected into + * the fifo. + * \param[in] update 0 Do not update the fifo's shadow information. + * 1 Update the fifo's shadow information. + * It is a performance optimization to only update the + * shadow information once for a group of descriptors + * being processed. + * + * \retval 0 False. The descriptor identified by desc_count is not done. + * It is still in the fifo. + * \retval 1 True. The descriptor identified by desc_count is done. + * It is no longer in the fifo. + * + */ +__INLINE__ unsigned int DMA_InjFifoIsDescriptorDoneById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + unsigned long long desc_count, + unsigned int update + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return(DMA_InjFifoIsDescriptorDone( &fg_ptr->fifos[fifo_id], + desc_count, + update ) ); + +} + + +/*! + * \brief DMA Injection Fifo Reserve Descriptor Storage Using a Fifo Id + * + * Reserve storage in a DMA injection fifo for a remote get descriptor, given + * a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval 0 Successful. There was enough space in the fifo and the + * storage was reserved. + * \retval -1 Unsuccessful. There was not enough space in the fifo. + * + * \note Internally, this increments the occupiedSize of the fifo by + * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS. + * + */ +__INLINE__ int DMA_InjFifoReserveDescriptorStorageById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return ( DMA_InjFifoReserveDescriptorStorage( &fg_ptr->fifos[fifo_id] ) ); +} + + +/*! + * \brief DMA Injection Fifo Free Descriptor Storage Reservation Using a Fifo Id + * + * Free a reservation for storage for a remote get descriptor in a DMA injection + * fifo, previously reserved using DMA_InjFifoReserveDescriptorStorageById(). + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \return None + * + * \note Internally, this decrements the occupiedSize of the fifo by + * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS. + * + */ +__INLINE__ void DMA_InjFifoFreeDescriptorStorageReservationById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + DMA_InjFifoFreeDescriptorStorageReservation( &fg_ptr->fifos[fifo_id] ); + return; +} + + +/*! + * \brief Check If An Injection Fifo Has Space For Injection Using a Fifo Id + * + * Check if an injection fifo has enough space for a single descriptor to be + * injected, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval hasSpace An indicator of whether the fifo has space for a + * descriptor. + * - 0 (false) means the fifo is full. + * - 1 (true) means the fifo has space. + * + */ +__INLINE__ unsigned int DMA_InjFifoHasSpaceById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_InjFifoHasSpace( &fg_ptr->fifos[fifo_id] ); +} + + +/*! + * \brief Inject a Descriptor into a DMA Injection Fifo Without Checking for + * Space Using a Fifo Id + * + * Inject a descriptor into a DMA injection fifo, given a fifo group and + * fifo id, without checking to see if there is enough space in the fifo. + * It is assumed that the caller has already checked for enough space using + * the DMA_InjFifoHasSpaceById() function. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] desc A pointer to the descriptor to be injected. + * Must be 16-byte aligned. + * + * \retval numDescInjected The number of descriptors injected. + * - 1 means it was successfully injected. + * + * \see DMA_InjFifoHasSpaceById() + */ +__INLINE__ int DMA_InjFifoInjectDescriptorNoSpaceCheckById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + DMA_InjDescriptor_t *desc + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_InjFifoInjectDescriptorNoSpaceCheck( &fg_ptr->fifos[fifo_id], + desc ); +} + + +/*! + * \brief Inject Descriptor into a DMA InjFifo Using a Fifo Id + * + * Inject a descriptor into a DMA injection fifo, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] desc Pointer to the descriptor to be injected into the fifo. + * + * \retval numDescInjected The number of descriptors injected. + * - 0 means it was not injected, most likely because + * the fifo is full. + * - 1 means it was successfully injected + * + */ +__INLINE__ int DMA_InjFifoInjectDescriptorById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + DMA_InjDescriptor_t *desc + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_InjFifoInjectDescriptor( &fg_ptr->fifos[fifo_id], + desc ); +} + + +/*! + * \brief Inject Multiple Descriptors into a DMA InjFifo Using a Fifo Id + * + * Inject multiple descriptors into a DMA injection fifo, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] num_desc Number of descriptors to be injected + * \param[in] desc Pointer to an array of pointers to the descriptors to + * be injected into the fifo. + * + * \retval numDescInjected The number of descriptors injected. + * - less than num_desc means that some were not + * injected, most likely because the fifo is full. + * - equal to num_desc means that all were + * successfully injected. + * + */ +__INLINE__ int DMA_InjFifoInjectDescriptorsById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + int num_desc, + DMA_InjDescriptor_t **desc + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return DMA_InjFifoInjectDescriptors ( &fg_ptr->fifos[fifo_id], + num_desc, + desc ); +} + + +/*! + * \brief Get DMA InjFifo Not Empty Status Using a Fifo Id + * + * Get a DMA injection fifo's not empty status, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \return 32-bit status. status bit "fifo_id" is 1 if the + * fifo is not empty, 0 if empty. That is, the return value + * is 0 if empty, non-zero if not empty. + * + */ +__INLINE__ unsigned DMA_InjFifoGetNotEmptyById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return ( DMA_InjFifoGetNotEmpty( fg_ptr ) & _BN(fifo_id) ); +} + + +/*! + * \brief Get DMA InjFifo Available Status Using a Fifo Id + * + * Get a DMA injection fifo's available status, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \return 32-bit status. status bit fifo_id is 1 if the + * fifo is available, 0 if empty. + * + * \note "available" status means the fifo is enabled and activated. + * + */ +__INLINE__ unsigned DMA_InjFifoGetAvailableById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return ( DMA_InjFifoGetAvailable( fg_ptr ) & _BN(fifo_id) ); +} + + +/*! + * \brief Get DMA InjFifo Threshold Crossed Status Using a Fifo Id + * + * Get a DMA injection fifo's threshold crossed status, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \return 32-bit status. status bit fifo_id is 1 if the + * fifo's threshold has been crossed, 0 if not. + * + * \note This will always be zero. + * + */ +__INLINE__ unsigned DMA_InjFifoGetThresholdCrossedById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return ( DMA_InjFifoGetThresholdCrossed( fg_ptr ) & _BN(fifo_id) ); +} + + +/*! + * \brief Clear DMA InjFifo Threshold Crossed Status Using a Fifo Id + * + * Clear a DMA injection fifo's threshold crossed status, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \return None + * + */ +__INLINE__ void DMA_InjFifoSetClearThresholdCrossedById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + DMA_InjFifoSetClearThresholdCrossed( fg_ptr, + _BN(fifo_id) ); +} + + +/*! + * \brief Get DMA InjFifo Activated Status Using a Fifo Id + * + * Get a DMA injection fifo's activated status, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \return 32-bit status. status bit fifo_id is 1 if the + * fifo is activated, 0 if empty. + * + */ +__INLINE__ unsigned DMA_InjFifoGetActivatedById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + return ( DMA_InjFifoGetActivated( fg_ptr ) & _BN(fifo_id) ); +} + + +/*! + * \brief Activate DMA InjFifo Using a Fifo Id + * + * Activate a DMA injection fifo, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \return None + * + */ +__INLINE__ void DMA_InjFifoSetActivateById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + DMA_InjFifoSetActivate( fg_ptr, + _BN(fifo_id) ); +} + + +/*! + * \brief Deactivate DMA InjFifo Using a Fifo Id + * + * Deactivate a DMA injection fifo, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \return None + * + */ +__INLINE__ void DMA_InjFifoSetDeactivateById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 ); + + DMA_InjFifoSetDeactivate( fg_ptr, + _BN(fifo_id) ); +} + + +__END_DECLS + + +#endif diff --git a/arch/powerpc/include/spi/DMA_Packet.h b/arch/powerpc/include/spi/DMA_Packet.h new file mode 100644 index 00000000000000..2fea7c98f6c437 --- /dev/null +++ b/arch/powerpc/include/spi/DMA_Packet.h @@ -0,0 +1,347 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + +#ifndef _DMA_PACKET_H_ /* Prevent multiple inclusion */ +#define _DMA_PACKET_H_ + + +/*! + * \file spi/DMA_Packet.h + * + * \brief DMA SPI Packet Definitions + * + */ + + +#include <common/namespace.h> + + + +__BEGIN_DECLS + + +/*! + * \brief Hint Bit: Packet wants to travel in the X plus direction. + */ +#define DMA_PACKET_HINT_XP (0x20) + + +/*! + * \brief Hint Bit: Packet wants to travel in the X minus direction. + */ +#define DMA_PACKET_HINT_XM (0x10) + + +/*! + * \brief Hint Bit: Packet wants to travel in the Y plus direction. + */ +#define DMA_PACKET_HINT_YP (0x08) + + +/*! + * \brief Hint Bit: Packet wants to travel in the Y minus direction. + */ +#define DMA_PACKET_HINT_YM (0x04) + + +/*! + * \brief Hint Bit: Packet wants to travel in the Z plus direction. + */ +#define DMA_PACKET_HINT_ZP (0x02) + + +/*! + * \brief Hint Bit: Packet wants to travel in the Z minus direction. + */ +#define DMA_PACKET_HINT_ZM (0x01) + + +/*! + * \brief Virtual Channel Bits: Dynamic 0. + */ +#define DMA_PACKET_VC_D0 (0) + + +/*! + * \brief Virtual Channel Bits: Dynamic 1. + */ +#define DMA_PACKET_VC_D1 (1) + + +/*! + * \brief Virtual Channel Bits: Deterministic Bubble Normal. + */ +#define DMA_PACKET_VC_BN (2) + + +/*! + * \brief Virtual Channel Bits: Deterministic Bubble Priority. + */ +#define DMA_PACKET_VC_BP (3) + + +/*! + * \brief Dynamic Routing Bit: Follows deterministic Routing. + */ +#define DMA_PACKET_DETERMINSTIC (0) + + +/*! + * \brief Dynamic Routing Bit: Follows dynamic Routing. + */ +#define DMA_PACKET_DYNAMIC (1) + + +/*! + * \brief Torus Hardware Packet Header Constants for Routing: Point to Point. + */ +#define DMA_PACKET_POINT2POINT (0) + + +/*! + * \brief Torus Hardware Packet Header Constants for Routing: Class. + */ +#define DMA_PACKET_CLASS (1) + + +/*! + * \brief Torus DMA Hardware Packet Header. + * + * There are two sections of the packet header: The hardware header + * and the software header. + * + * The same 8-byte hardware header as was used on Blue Gene/L is used + * for Blue Gene/P, except that two bits that were previously unused + * will be used as follows: + * + * - The Pid bit on Blue Gene/L indicates the logical destination group. + * This determines the reception fifo group a packet ends up in. + * This bit is now called Pid0. The new Pid1 bit expands the logical + * destination group from two to four. This corresponds to the increase + * in cores from two to four. + * + * - The new Dm bit indicates the DMA mode: Memory fifo or direct. + * In memory fifo mode, the DMA receives packets from the torus fifos into + * reception fifos located in memory. Then the core copies the payload + * from the memory fifo to its final destination. In direct mode, the DMA + * moves the packet payload directly from the torus fifos to its final + * destination. + * + * The 8-byte software header was used by the software on Blue Gene/L for + * its own purposes. On Blue Gene/P, parts of it are used by the DMA, + * depending on the type of DMA transfer being used. The usage of the fields + * in the software header is as follows for the typical types of DMA transfers: + * + * - In memory fifo mode, + * - The first 4 bytes of the software header contain the "put offset". + * This is the offset from the injection counter's base address, in bytes, + * of the memory being transferred in this packet. + * - The last 4 bytes of the software header is for use by software. + * + * - In direct put mode, + * - The first 4 bytes of the software header contain the "put offset". + * This is the offset from the reception counter's base address, in bytes, + * of the memory where the payload in this packet is placed. + * - The fifth byte of the software header is the reception counter Id. + * - The sixth byte of the software header is the number of valid bytes of + * payload in this packet. + * - The seventh byte of the software header contains DMA flags. Specifically, + * the remote-get flag is 0. + * - The last byte of the software header is for use by software. + * + * - In remote get mode, the payload contains one or more injection descriptors + * describing data to be transferred by the DMA. When the DMA receives this + * packet, it injects the descriptors into injection fifos to perform the + * specified data transfer. + * - The first 5 bytes of the software header are for use by software. + * - The sixth byte of the software header is the number of valid bytes of + * payload in this packet. This will be a multiple of 32, since the payload + * consists of one or more 32 byte DMA descriptors. + * - The seventh byte of the software header contains DMA flags. Specifically, + * the remote-get flag is 1. + * - The eighth byte of the software header is the injection fifo Id where + * the descriptors in the payload will be injected. + * + */ +typedef struct DMA_PacketHeader_t +{ + union { + unsigned word0; /*!< First 4 bytes of packet header. */ + + struct { + unsigned CSum_Skip : 7; /*!< Number of 2 byte units to skip from + the top of a packet before including + the packet bytes into the running + checksum of the torus injection fifo + where this packet is injected. + */ + + unsigned Sk : 1; /*!< Torus injection checksum skip packet + bit. + - 0 includes the packet (excluding the + portion designated by DMA_CSUM_SKIP) + in the checksum. + - 1 excludes the entire packet from + the checksum. + */ + + unsigned Hint : 6; /*!< Hint bits for torus routing (6 bits). + Each bit corresponds to x+, x-, y+, y-, + z+, z-. If a bit is set, it indicates + that the packet wants to travel along + the corresponding direction. If all + bits are zero, the hardware calculates + the hint bits. Both x+ and x- cannot + be set at the same time...same with y + and z. + */ + + unsigned Dp : 1; /*!< Deposit Bit for Class Routed MultiCast. + If this bit is set to 1, then as the + packet travels along a straight line + to its destination, it also deposits + a copy of itself into each node as it + goes through. This feature must be + used only if the packet is set to + travel along a straight line. + */ + + unsigned Pid0 : 1; /*!< Destination Fifo Group Most Significant + Bit. (Pid0,Pid1) specifies which of 4 + reception fifo groups that this packet + is destined for. + */ + + unsigned Chunks : 3; /*!< Size in Chunks of 32B (0 for 1 chunk, + ... , 7 for 8 chunks). + */ + + unsigned Pid1 : 1; /*!< Destination Fifo Group Least + significant bit. Refer to Pid0. + */ + + unsigned Dm : 1; /*!< 1=DMA Mode, 0=Fifo Mode. */ + + unsigned Dynamic : 1; /*!< 1=Dynamic Routing, + 0=Deterministic Routing. + */ + + unsigned VC : 2; /*!< Virtual Channel + - 0=Dynamic 0 + - 1=Dynamic 1 + - 2=Deterministic Bubble Normal + - 3=Deterministic Bubble Priority + */ + + unsigned X : 8; /*!< Destination X Physical Coordinate. */ + + }; /* End of individual fields in Word 0 */ + + }; /* End of Word 0 */ + + + union { + + unsigned word1; /*!< Second 4 bytes of packet header. */ + + struct { + unsigned Y : 8; /*!< Destination Y Physical Coordinate. */ + + unsigned Z : 8; /*!< Destination Z Physical Coordinate. */ + + unsigned Resvd0 : 8; /*!< Reserved (pkt crc). */ + + unsigned Resvd1 : 8; /*!< Reserved (pkt crc). */ + + }; /* End of individual fields in Word 1 */ + + }; /* End of Word 1 */ + + + union { + + unsigned word2; /*!< Third 4 bytes of packet header. */ + + unsigned Put_Offset; /*!< For a memory fifo packet, gives a + unique ID to each packet in a long + message. Derived from the put offset + of the torus packet header in the + descriptor, and updated for each + packet. + For a direct-put packet, the rDMA + writes the first payload byte to this + offset plus the base address + corresponding to the rDMA counter ID. + */ + + unsigned Single_Packet_Parameter; /*!< For a single memory fifo packet, + this is essentially unused space + that can be used to pass a + parameter to the target node. + */ + }; /* End of Word 2 */ + + + union { + + unsigned word3; /*!< Fourth 4 bytes of packet header. */ + + struct { + unsigned rDMA_Counter : 8; /*!< For a direct-put packet, this is the + number of the rDMA counter associated + with this packet. + */ + + unsigned Payload_Bytes : 8; /*!< For a direct-put packet, this is the + number of valid bytes in the payload. + This is set by the iDMA, based on the + message length in the injection + descriptor. + */ + + unsigned Flags : 8; /*!< Flags[6]=Pacing, Flags[7]=Remote-Get.*/ + + unsigned iDMA_Fifo_ID : 8; /*!< For a remote-get packet, this is the + iDMA fifo ID to be injected during + remote-get processing. + */ + }; + + struct { /* For memory fifo packets... */ + + unsigned SW_Arg : 24; /*!< User-defined. */ + + unsigned Func_Id : 8 ; /*!< Function ID for dispatching receiver + functions from Polling reception + fifos. + */ + }; + + }; /* End of Word 3 */ + +} +ALIGN_QUADWORD DMA_PacketHeader_t; + + + + +__END_DECLS + + +#endif diff --git a/arch/powerpc/include/spi/DMA_RecFifo.h b/arch/powerpc/include/spi/DMA_RecFifo.h new file mode 100644 index 00000000000000..20c8b34538d9ff --- /dev/null +++ b/arch/powerpc/include/spi/DMA_RecFifo.h @@ -0,0 +1,1810 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + + +#ifndef _DMA_RECFIFO_H_ /* Prevent multiple inclusion */ +#define _DMA_RECFIFO_H_ + + +/*! + * \file spi/DMA_RecFifo.h + * + * \brief DMA SPI Reception Fifo Definitions and Inline Functions + * + * There are + * - 6 normal-priority torus hardware fifos + * - 1 high-priority torus hardware fifo + * - 1 local memcpy reception fifo + * If we assigned a reception fifo to each, there would be 8. These are called + * "normal reception fifos". + * + * There is one reception fifo used to store packet headers only (for debug). + * This is called the "header reception fifo". + * + * The hardware packet header's (Pid0, Pid1) bits specify up to four processors. + * There is one set of "normal" and one "header" reception fifos per processor, + * called a "reception fifo group". Thus, there are 4 groups. + * + */ + + +#include <common/namespace.h> +/* #include <common/bgp_ras.h> */ + + +__BEGIN_DECLS + + +/*! + * \brief __INLINE__ definition + * + * Option 1: + * Make all functions be "static inline": + * - They are inlined if the compiler can do it + * - If the compiler does not inline it, a single copy of the function is + * placed in the translation unit (eg. xxx.c)for use within that unit. + * The function is not externalized for use by another unit...we want this + * so we don't end up with multiple units exporting the same function, + * which would result in linker errors. + * + * Option 2: + * A GNU C model: Use "extern inline" in a common header (this one) and provide + * a definition in a .c file somewhere, perhaps using macros to ensure that the + * same code is used in each case. For instance, in the header file: + * + \verbatim + #ifndef INLINE + # define INLINE extern inline + #endif + INLINE int max(int a, int b) { + return a > b ? a : b; + } + \endverbatim + * + * ...and in exactly one source file (in runtime/SPI), that is included in a + * library... + * + \verbatim + #define INLINE + #include "header.h" + \endverbatim + * + * This allows inlining, where possible, but when not possible, only one + * instance of the function is in storage (in the library). + */ +#ifndef __INLINE__ +#define __INLINE__ extern inline +#endif + + + + +#include <spi/DMA_Fifo.h> +#include <spi/DMA_Assert.h> +#include <spi/DMA_Packet.h> + + + + +/*! + * \brief Number of Normal (non-header) Reception Fifos Per Group + * + * These will have fifo IDs 0 through 7 in a group. + * + */ +#define DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP 8 + + +/*! + * \brief Number of Header Reception Fifos Per Group + * + */ +#define DMA_NUM_HEADER_REC_FIFOS_PER_GROUP 1 + + +/*! + * \brief Fifo ID of the Header Reception Fifo in a group. + * + * This will be fifo ID 8 in a group. + */ +#define DMA_HEADER_REC_FIFO_ID (DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP) + + +/*! + * \brief Number of Reception Fifos Per Group + * + * 8 Normal + 1 Header + */ +#define DMA_NUM_REC_FIFOS_PER_GROUP (DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP + \ + DMA_NUM_HEADER_REC_FIFOS_PER_GROUP) + + +/*! + * \brief Number of Reception Fifo Groups + * + * One group for each processor, identified by (Pid0,Pid1) in the packet header. + */ +#define DMA_NUM_REC_FIFO_GROUPS 4 + + +/*! + * \brief Total Number of Normal Reception Fifos + */ +#define DMA_NUM_NORMAL_REC_FIFOS (DMA_NUM_REC_FIFO_GROUPS * \ + DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP) + +/*! + * \brief Total Number of Header Reception Fifos + */ +#define DMA_NUM_HEADER_REC_FIFOS (DMA_NUM_REC_FIFO_GROUPS * \ + DMA_NUM_HEADER_REC_FIFOS_PER_GROUP) + + +/*! + * \brief The maximum number of packets that can be processed by a polling + * function before it must update the fifo's hardware head. The + * polling function can keep track of the head in the va_head shadow + * and only update the hardware head when this limit is reached to + * reduce overhead. + */ +#define DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD 32 + + +/*! + * \brief Minimum Reception Fifo Size in bytes + * + * It is important that this size be enough to hold more packets than + * DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD. Otherwise, the polling + * function may deadlock with the DMA (the DMA considers the fifo full, + * but we have actually processed all of the packets). + * Additionally, we add 512 bytes to this, since the DMA will only fill + * the fifo to fifoSize - 512 bytes. + * + * + */ +#define DMA_MIN_REC_FIFO_SIZE_IN_BYTES (512 + (256 * DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD)) + +/*! + * \brief DMA Reception Fifo Map Structure + * + * This structure defines the basic reception fifo configuration. + * It is common across all reception fifo groups. + * + * Example 1: + * In SMP mode you might have only two reception fifos: + * - One for normal-priority torus and local transfers, and + * - one for high-priority torus transfers. + * In this case, only one fifo group would be needed. + * + * Example 2: + * In virtual node mode, you might have two reception fifos per group (as + * described in Example 1), and 4 groups, one for each processor. + * All packets with the same (pid0,pid1) bits use the same group. +*/ +typedef struct DMA_RecFifoMap_t +{ + unsigned short int save_headers; /*!< Flag that specifies whether header + will be used to store packet headers. + - 0 => Do not store headers + - 1 => Store headers (debug mode) */ + + unsigned int fifo_types[ DMA_NUM_NORMAL_REC_FIFOS ]; /*!< The type of each + normal rec fifo. If entry i is + - 0, rec fifo i is type 0 + - 1, rec fifo i is type 1 + + For type i fifo, threshold interrupt + fires if fifo free space <= + threshold[i], in units of 16B quads. */ + + unsigned int hdr_fifo_types[ DMA_NUM_HEADER_REC_FIFOS ]; /*!< The type of + each header reception fifo. If entry + i is + - 0, header rec fifo i is type 0 + - 1, header rec fifo i is type 1 + + For type i fifo, threshold interrupt + fires if fifo free space <= + threshold[i], in units of 16B quads. */ + + unsigned int threshold[2]; /*!< For type i fifo, threshold interrupt + fires if fifo free space <= + threshold[i], in units of 16B quads. */ + + unsigned char ts_rec_map[4][8]; /*!< Torus Reception Map. + This array contains the rec fifo ID into which torus + packets are deposited that originate from + - each hardware torus fifo (x+,x-,y+,y-,z+,z-) (0 through 5) + - high-priority hardware torus fifo (6) + - a local transfer (7) + + for each group (0 through 3). + + For ts_rec_map[i][j], + i is the rec fifo group ID, as defined by (pid0,pid1) pair. + j identifies the hardware torus fifo (0-5 for + x+,x-,y+,y-,z+,z- repectively), high-priority + torus fifo (6), and local transfer (7). + The value in each arrary element must be a global fifo ID + (between 0 and DMA_NUM_NORMAL_REC_FIFOS-1). That is, the value + identifies the normal rec fifo that will receive packets + originating at i,j. + Note that the global fifo ID must be 0-7 for group 0, + 8-15 for group 1, 16-23 for group 2, and 24-31 for group 3. + + This affects DCRS 0xd60 to 0xd67 as defined by the following + - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_XY (_BGP_DCR_DMA+0x60) + - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_ZHL (_BGP_DCR_DMA+0x61) + - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_XY (_BGP_DCR_DMA+0x62) + - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_ZHL (_BGP_DCR_DMA+0x63) + - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_XY (_BGP_DCR_DMA+0x64) + - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_ZHL (_BGP_DCR_DMA+0x65) + - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_XY (_BGP_DCR_DMA+0x66) + - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_ZHL (_BGP_DCR_DMA+0x67) + + e.g., for (pid0,pid1) = (0,1) + - ts_rec_map[1][0] = fifo id for torus x+ receiver packets + - ts_rec_map[1][1] = fifo id for torus x- receiver packets + - ts_rec_map[1][2] = fifo id for torus y+ receiver packets + - ts_rec_map[1][3] = fifo id for torus y- receiver packets + - ts_rec_map[1][4] = fifo id for torus z+ receiver packets + - ts_rec_map[1][5] = fifo id for torus z- receiver packets + - ts_rec_map[1][6] = fifo id for torus high priority packets + - ts_rec_map[1][7] = fifo id for local transfer packets */ + +} DMA_RecFifoMap_t; + + +/*! + * \brief DMA Reception Fifo Status Structure + * + * Defines the DMA SRAM Status Area for Reception Fifos + * + */ +typedef struct DMA_RecFifoStatus_t +{ + volatile unsigned not_empty[2]; /*!< R bit mask, 1 bit/FIFO: + Reception FIFO not empty status. + Not_empty[0] contains 1 bit for each + of the 32 normal fifos. + Each bit corresponds to a + global fifo ID. + Not_empty[1] : + - bit 7 for group 0 header fifo, + - bit 15 for group 1 header fifo, + - bit 23 for group 2 header fifo, + - bit 31 for group 3 header fifo. */ + + volatile unsigned available[2]; /*!< R bitmask, 1 bit/FIFO: + Reception FIFO available status. + Bits are as above for available[0] + and available[1]. */ + + volatile unsigned threshold_crossed[2]; /*!< R bitmask, 1 bit/FIFO: + Threshold crossed status. + Bits are as above for + threshhold_crossed[0] and + threshhold_crossed[1]. */ + + volatile unsigned clear_threshold_crossed[2]; /*!< W bitmask, 1 bit/FIFO: + Clear Threshold Crossed Status. + Bits are as above for + clear_threshold_crossed[0] and + clear_threshold_crossed[1]. */ +} +DMA_RecFifoStatus_t; + + +/*! + * \brief Returns the word number that the specified reception fifo is in + * + * \param[in] global_rec_fifo_id The global ID of the reception fifo + * (0 to DMA_NUM_REC_FIFOS-1). + * + * \return The number of the word that the specified fifo is in (0 or 1). + * + * Used as an index in the "not_empty", "available", "threshold_crossed", and + * "clear_threshold_crossed" fields of the DMA_RecFifoStatus_t structure. + * + */ +#define DMA_REC_FIFO_GROUP_WORD_ID(global_rec_fifo_id) \ + ( ((global_rec_fifo_id)>>5) & _BN(31) ) + + +/*! + * \brief Reception DMA Fifo Structure + * + * This structure contains a software DMA fifo structure (defined in DMA_Fifo.h) + * and other fields that are specific to a reception fifo used by software. + */ +typedef struct DMA_RecFifo_t +{ + DMA_Fifo_t dma_fifo; /*!< Common software fifo structure */ + + unsigned char global_fifo_id; /*!< Global fifo ID: + - 0 to DMA_NUM_NORMAL_REC_FIFOS-1 + for normal fifos, + - 32-35 for header fifos. */ + /*! + * \note The following field contains info about the fifo that reflects the + * DCR values configuring the fifo. + */ + + unsigned char type; /*!< 0 or 1 for type of fifo. */ + + /*! + * \note The following field is used by the reception fifo polling functions. + * It counts the number of packets processed since the fifo's hardware + * head was last updated. When DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD + * is reached, the hardware head is moved and this counter is reset + * to zero. This helps to minimize the number of times the hardware + * head is updated, which can be an expensive operation. + */ + unsigned int num_packets_processed_since_moving_fifo_head; /*!< Tracks when + to move the + fifo head. */ +} +DMA_RecFifo_t; + + +/*! + * \brief DMA Reception Fifo Group Structure + * + * This structure defines a DMA Reception Fifo Group. It points to a + * Reception Fifo Status structure, and contains DMA_NUM_REC_FIFOS_PER_GROUP + * Reception Fifo structures. + * + * It is returned from the DMA_RecFifoGetFifoGroup system call wrapper function + * defined in this header file. This same structure must be used by all users + * of reception fifos in this group because the fifos will contain packets + * destined for these different users, and this structure contains shadows of + * the hardware fifos in the DMA SRAM that must be maintained as the fifos + * change. This common structure is located in static storage + * declared in DMA_RecFifo.c. + * + */ +typedef struct DMA_RecFifoGroup_t +{ + int group_id; /*!< Group ID (0 through DMA_NUM_REC_FIFO_GROUPS-1) */ + + int num_normal_fifos; /*!< Number of normal fifos used in this group + (0 through DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP) */ + + int num_hdr_fifos; /*!< Number of header fifos used in this group + - 0 - headers not being saved, + - 1 - headers being saved. */ + + unsigned mask; /*!< All reads to the status for this group are + masked by this, so you only see results for + this group. + - Group 0: 0xFF000000 + - Group 1: 0x00FF0000 + - Group 2: 0x0000FF00 + - Group 3: 0x000000FF */ + + void *unused1; /*!< Unused space */ + + DMA_RecFifoStatus_t *status_ptr; /*!< Pointer to the status, in DMA SRAM. */ + + DMA_RecFifo_t fifos[DMA_NUM_REC_FIFOS_PER_GROUP]; /*!< Rec Fifos. + Indexes 0 through DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1 are + the normal fifos in the group. + Index DMA_HEADER_REC_FIFO_ID is the header fifo in the + group. + Note: fifos[0] may not be local fifo number 0 in the group.*/ +} +ALIGN_L1D_CACHE DMA_RecFifoGroup_t; + + +/*! + * \brief DMA Reception Fifo Receive Function Prototype + * + * Receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface, which assigns a registration ID + * to the function. When the polling functions process a packet in a + * reception fifo, the appropriate receive function for that packet is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always return a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. + * + * \param[in] f_ptr Pointer to the reception fifo. + * \param[in] packet_ptr Pointer to the packet header (== va_head). + * This is quad-aligned for optimized copying. + * \param[in] recv_func_parm Pointer to storage specific to this receive + * function. This pointer was specified when the + * receive function was registered with the kernel, + * and is passed to the receive function + * unchanged. + * \param[in] payload_ptr Pointer to the beginning of the payload. + * This is quad-aligned for optimized copying. + * \param[in] payload_bytes Number of bytes in the payload. Note that this + * may be larger than the number of valid bytes + * in the payload because it is rounded such that + * it (payload_bytes) + 16 (size of packet header) + * is a multiple of 32 bytes. Thus, if the size + * of the message is 64, payload_bytes is 80 such + * that the total packet size is 96, a multiple of + * 32. + * + * \retval 0 No errors found while processing the payload. + * \retval negative_number Errors found while processing the payload. + */ +typedef int (*DMA_RecFifoRecvFunction_t)( + DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ); + + +/*! + * \brief DMA Reception Fifo Default Error Receive Function + * + * This is the default function that will handle packets having an + * unregistered registration ID. + * + * Receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface, which assigns a registration ID + * to the function. When the polling functions process a packet in a + * reception fifo that has a registration ID that does not have a corresponding + * receive function registered, this error receive function is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always be 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always return a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. + * + * \param[in] f_ptr Pointer to the reception fifo. + * \param[in] packet_ptr Pointer to the packet header (== va_head). + * This is quad-aligned for optimized copying. + * \param[in] recv_func_parm Pointer to storage specific to this receive + * function. This pointer was specified when the + * receive function was registered with the kernel, + * and is passed to the receive function + * unchanged. + * \param[in] payload_ptr Pointer to the beginning of the payload. + * This is quad-aligned for optimized copying. + * \param[in] payload_bytes Number of bytes in the payload + * + * \retval -1 An unregistered packet was just processed. This is considered + * an error. + */ +int DMA_RecFifoDefaultErrorRecvFunction( + DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ); + + +/*! + * \brief Set DMA Reception Fifo Map + * + * This function is a wrapper around a system call that + * - Sets DCRs establishing the map between the hardware torus fifos and the + * DMA reception fifos that are to receive the packets from those hardware + * torus fifos. + * - Sets DCRs establishing the DMA reception fifos that are to receive + * local transfer packets. + * - Sets the DCRs establishing the type (0 or 1) of each reception fifo. + * - Sets the DCRs establishing the threshold for type 0 and 1 reception fifos. + * - Leaves all of the fifos that are used in a "disabled" state. + * DMA_RecFifoInitById() initializes and enables the fifos. + * + * \param[in] rec_map Reception Fifo Map structure, defining the mapping. + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h. + * _bgp_err_dma_rfifo_map_twice means the mapping has + * already been set. + * + * \note This function should be called once per job, after DMA_ResetRelease(). + * It may be called by any core, but once a core has called it, other + * calls by that same core or any other core will fail. + * + * \note During job init, the kernel sets up the DCR clear masks for each + * reception fifo group (DCRs 0xD68 - 0xD6C) such that a write to clear + * a fifo in group g only clears group g. + * + */ +__INLINE__ int DMA_RecFifoSetMap( + DMA_RecFifoMap_t * rec_map + ) +{ + int rc; + rc = Kernel_RecFifoSetMap((uint32_t*)rec_map); + return rc; +} + + +/*! + * \brief Get DMA Reception Fifo Map + * + * This function is a wrapper around a system call that returns a DMA + * reception fifo map structure, filled in according to the DCRs. + * + * \param[in,out] rec_map A pointer to a Reception Fifo Map structure + * that will be filled-in upon return. + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * + */ +__INLINE__ int DMA_RecFifoGetMap( + DMA_RecFifoMap_t *rec_map + ) +{ + int rc; + rc = Kernel_RecFifoGetMap((uint32_t*)rec_map); + return rc; +} + + +/*! + * \brief Get DMA Reception Fifo Group + * + * This is a wrapper around a System Call. This function returns THE + * one-and-only pointer to the fifo group structure, with the entries all + * filled in from info in the DCRs. If called multiple times with the same + * group, it will always return the same pointer, and the system call will + * not be invoked again. + * + * It must be called AFTER DMA_RecFifoSetMap(). + * + * By convention, the same "target" is used for normal and header fifo + * interrupts (could be changed). In addition, by convention, interrupts for + * fifos in group g come out of the DMA as non-fatal irq bit 28+g, + * ie, only fifos in group g can cause the "type g" threshold interrupts. + * + * \param[in] grp The group number (0 through DMA_NUM_REC_FIFO_GROUPS). + * \param[in] target The core that will receive the interrupt when a + * fifo in this group reaches its threshold + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * Ignored on subsequent call with the same group. + * \param[in] normal_handler A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * normal fifo in this group reaches its threshold. + * This function must be coded to take 4 uint32_t + * parameters: + * - A pointer to storage specific to this + * handler. This is the normal_handler_parm + * specified on this function call. + * - 3 uint32_t parameters that are not used. + * If normal_handler is NULL, threshold interrupts + * are not delivered for normal fifos in this group. + * Ignored on subsequent call with the same group. + * \param[in] normal_handler_parm A pointer to storage that should be passed + * to the normal interrupt handling function + * (see normal_handler parameter). + * Ignored on subsequent call with the same + * group. + * \param[in] header_handler ** This parameter is deprecated. Specify NULL.** + * A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * header fifo in this group reaches its threshold. + * This function must be coded to take 2 parameters: + * void* A pointer to storage specific to this + * handler. This is the header_handler_parm + * specified on this function call. + * int The global fifo ID of the fifo that hit + * its threshold (0 through + * NUM_DMA_REC_FIFOS-1). + * If header_handler is NULL, threshold interrupts + * are not delivered for header fifos in this group. + * Ignored on subsequent call with the same group. + * \param[in] header_handler_parm ** This parameter is deprecated. Specify + * NULL. ** + * A pointer to storage that should be passed + * to the header interrupt handling function + * (see header_handler parameter). + * Ignored on subsequent call with the same + * group. + * \param[in] interruptGroup A InterruptGroup_t that identifies the + * group of interrupts that the fifos in this group + * will become part of. + * Ignored on subsequent call with the same group. + * + * \return RecFifoGroupStruct Pointer to a DMA Reception Fifo Group structure + * that reflects the fifos that are being used in + * this group. This same structure is shared by + * all users of this reception fifo group. + * NULL is returned if an error occurs. + */ +DMA_RecFifoGroup_t * +DMA_RecFifoGetFifoGroup( + int grp, + int target, + Kernel_CommThreadHandler normal_handler, + void *normal_handler_parm, + Kernel_CommThreadHandler header_handler, + void *header_handler_parm, + Kernel_InterruptGroup_t interruptGroup + ); + + + + +/* + * ----------------------------------------------------------------------------- + * Calls to access the Fifo, given a reception fifo structure + * ----------------------------------------------------------------------------- + */ + + + + +/*! + * \brief Increment DMA Reception Fifo Head + * + * Increment a DMA reception fifo's "head", given a reception fifo structure + * + * \param[in] f_ptr Pointer to the reception fifo structure + * \param[in] incr The number of quads (16 byte units) to increment the + * head pointer by. + * + * \return None + * + * \post va_head is set in both the hardware and software fifo structures, + * and the fifo free space is recalculated. + * + */ +__INLINE__ void DMA_RecFifoIncrementHead( + DMA_RecFifo_t *f_ptr, + unsigned int incr + ) +{ + SPI_assert( f_ptr != NULL ); + + { + void *va_head = DMA_FifoGetHeadNoFreeSpaceUpdate( &f_ptr->dma_fifo ); + + void *va_end = DMA_FifoGetEndFromShadow( &f_ptr->dma_fifo ); + + unsigned int incr_bytes = incr << 4; + + unsigned int bytes_to_end = (unsigned)va_end - (unsigned)va_head; + + /* + * Note: The following check must be >= instead of just >. We never want + * the head to be equal to the end so we can always copy a quad + * from the head, safely. + */ + if ( incr_bytes >= bytes_to_end ) + { + va_head = (char *) + ( (unsigned)DMA_FifoGetStartFromShadow( &f_ptr->dma_fifo ) + + ( incr_bytes - bytes_to_end ) ); + } + else + { + va_head = (char *)( (unsigned)va_head + incr_bytes ); + } + + /* Set the head and update the fifo's free space */ + DMA_FifoSetHead( &f_ptr->dma_fifo, + va_head ); + } +} + + +/*! + * \brief Get the "Not Empty" Status of a Reception Fifo Group + * + * Get the "Not Empty" status of the reception fifos that are being used in the + * specified "not empty" word. + * + * \param[in] fg_ptr Pointer to the reception fifo group structure + * \param[in] word The word (0 or 1) of the "not empty" status to be + * returned. + * + * \retval notEmptyStatus A 32-bit value: + * If "word" is 0, bit i is 1 if normal rec fifo i is + * in use and is not empty. + * If "word" is 1, bits 7, 15, 23, 31 are 1 if header + * rec fifos for groups 1, 2, 3, 4 respectively are in + * use and not empty. + * + */ +__INLINE__ unsigned DMA_RecFifoGetNotEmpty( + DMA_RecFifoGroup_t *fg_ptr, + int word + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + SPI_assert( (word == 0) || (word == 1) ); + + /* printf("RecFifoGetNotEmpty: group=%d, status addr=0x%08x, not_empty=0x%08x, mask=0x%08x, RecFifoHwAddr=0x%08x, RecFifo4PaTail=0x%08x, PaHead=0x%08x\n", */ + /* fg_ptr->group_id, (unsigned)(&(fg_ptr->status_ptr->not_empty[word])), */ + /* fg_ptr->status_ptr->not_empty[word], fg_ptr->mask, */ + /* (unsigned)(fg_ptr->fifos[4].dma_fifo.fifo_hw_ptr), */ + /* fg_ptr->fifos[4].dma_fifo.fifo_hw_ptr->pa_tail, */ + /* fg_ptr->fifos[4].dma_fifo.fifo_hw_ptr->pa_head); */ + + return ( fg_ptr->status_ptr->not_empty[word] & fg_ptr->mask ); + +} + + +/*! + * \brief Get the "Available" Status of a Reception Fifo Group + * + * Get the "available" status of the reception fifos that are being used in the + * specified "available" word. + * + * \param[in] fg_ptr Pointer to the reception fifo group structure + * \param[in] word The word (0 or 1) of the "available" status to be + * returned. + * + * \retval availableStatus A 32-bit value: + * If "word" is 0, bit i is 1 if normal rec fifo i is + * in use and is available. + * If "word" is 1, bits 7, 15, 23, 31 are 1 if header + * rec fifos for groups 1, 2, 3, 4 respectively are in + * use and available. + * + */ +__INLINE__ unsigned DMA_RecFifoGetAvailable( + DMA_RecFifoGroup_t *fg_ptr, + int word + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + SPI_assert( (word == 0) || (word == 1) ); + + return ( fg_ptr->status_ptr->available[word] & fg_ptr->mask ); +} + + +/*! + * \brief Get the "Threshold Crossed" Status of a Reception Fifo Group + * + * Get the "threshold crossed" status of the reception fifos that are being + * used in the specified "threshold crossed" word. + * + * \param[in] fg_ptr Pointer to the reception fifo group structure + * \param[in] word The word (0 or 1) of the "threshold crossed" status + * to be returned. + * + * \retval thresholdCrossedStatus A 32-bit value: + * If "word" is 0, bit i is 1 if normal rec fifo i is + * in use and its threshold has been crossed. + * If "word" is 1, bits 7, 15, 23, 31 are 1 if header + * rec fifos for groups 1, 2, 3, 4 respectively are in + * use and their threshold has been crossed. + * + */ +__INLINE__ unsigned DMA_RecFifoGetThresholdCrossed( + DMA_RecFifoGroup_t *fg_ptr, + int word + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + SPI_assert( (word == 0) || (word == 1) ); + + return ( fg_ptr->status_ptr->threshold_crossed[word] & fg_ptr->mask ); +} + + +/*! + * \brief Set the "Clear Threshold Crossed" Status of Specified Fifos in a + * Reception Fifo Group + * + * Set the "clear threshold crossed" status of the specified reception fifos + * in the specified "clear threshold crossed" word. + * + * \param[in] fg_ptr Pointer to the reception fifo group structure + * \param[in] clr 32-bit value, specifying which fifos are to have + * their "clear threshold crossed" status set. + * If "word" is 0, bit i is 1 if normal rec fifo i is + * to have its "clear threshold crossed" status set. + * If "word" is 1, one of bits 7, 15, 23, 31 is 1 if + * header fifo for group 1, 2, 3, 4 respectively is to + * have its "clear threshold crossed" status set. + * Fifos that are not in the group will not have their + * status set. + * \param[in] word The word (0 or 1) of the "clear threshold crossed" + * status to be set. + * + * \return None + * + * \note This function does an MBAR after setting the status to ensure the + * writes have been accepted by the memory system before allowing other + * memory accesses to to occur. +*/ +__INLINE__ void DMA_RecFifoSetClearThresholdCrossed( + DMA_RecFifoGroup_t *fg_ptr, + unsigned int clr, + int word + ) +{ + SPI_assert( fg_ptr != NULL ); + SPI_assert( fg_ptr->status_ptr != NULL ); + SPI_assert( (word == 0) || (word == 1) ); + + fg_ptr->status_ptr->clear_threshold_crossed[word] = clr & fg_ptr->mask; + + _bgp_mbar(); + +} + + +/* + * ----------------------------------------------------------------------------- + * Calls to access the Fifo, given a fifo group and a fifo ID + * ----------------------------------------------------------------------------- + */ + + + + +/*! + * \brief DMA RecFifo Initialization By Id + * + * - For a DMA reception fifo, initialize its start, head, tail, and end. + * - Compute fifo size and free space. + * + * \param[in] fg_ptr Pointer to fifo group structure. + * \param[in] fifo_id Id of the fifo to be initialized + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * \param[in] va_start Virtual address of the start of the fifo. + * \param[in] va_head Virtual address of the head of the fifo (typically + * equal to va_start). + * \param[in] va_end Virtual address of the end of the fifo. + * + * \retval 0 Successful. + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h. + * _bgp_err_dma_rfifo_map_twice means this fifo has + * already been initialized + * + */ +__INLINE__ int DMA_RecFifoInitById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id, + void *va_start, + void *va_head, + void *va_end + ) +{ + int rc; + + SPI_assert( fg_ptr != NULL ); + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( ( (uint32_t) va_end - (uint32_t)va_start ) >= + DMA_MIN_REC_FIFO_SIZE_IN_BYTES ); + + /* + * Initialize the fifo by invoking a system call. + */ + + rc = Kernel_RecFifoInitById( + (uint32_t*)fg_ptr, + fifo_id, + va_start, + va_head, + va_end); + + return rc; +} + + +/*! + * \brief Get DMA RecFifo Start Pointer from the Shadow Using a Fifo Id + * + * Get a DMA reception fifo's start pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * + * \retval va_start The virtual address of the start of the fifo + * + */ +__INLINE__ void * DMA_RecFifoGetStartById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + return DMA_FifoGetStartFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA RecFifo Head Pointer Using a Fifo Id + * + * Get a DMA reception fifo's head pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * + * \retval va_head The virtual address of the head of the fifo + * + */ +__INLINE__ void * DMA_RecFifoGetHeadById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + return DMA_FifoGetHead( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA RecFifo Tail Pointer Using a Fifo Id + * + * Get a DMA reception fifo's tail pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * + * \retval va_tail The virtual address of the tail of the fifo + * + */ +__INLINE__ void *DMA_RecFifoGetTailById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + return DMA_FifoGetTail( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA RecFifo End Pointer from the Shadow Using a Fifo Id + * + * Get a DMA reception fifo's end pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * + * \retval va_end The virtual address of the end of the fifo + * + */ +__INLINE__ void *DMA_RecFifoGetEndById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + return DMA_FifoGetEndFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA RecFifo Size Using a Fifo Id + * + * Get a DMA reception fifo's size, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * + * \retval size The size of the DMA fifo, in units of 16B quads. + * + */ +__INLINE__ unsigned int DMA_RecFifoGetSizeById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + return DMA_FifoGetSize( &fg_ptr->fifos[fifo_id].dma_fifo ); +} + + +/*! + * \brief Get DMA RecFifo Free Space Using a Fifo Id + * + * Get a DMA reception fifo's free space, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * \param[in] read_head Indicates whether to read the head from the hardware + * fifo before calculating the free space. + * - 1 means to read the hardware head + * - 0 means to use the current head shadow + * \param[in] read_tail Indicates whether to read the tail from the hardware + * fifo before calculating the free space. + * - 1 means to read the hardware tail + * - 0 means to use the current tail shadow + * + * \retval freeSpace The amount of free space in the fifo, in units of + * 16B quads. + * + */ +__INLINE__ unsigned int DMA_RecFifoGetFreeSpaceById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id, + unsigned int read_head, + unsigned int read_tail + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + return DMA_FifoGetFreeSpace( &fg_ptr->fifos[fifo_id].dma_fifo, + read_head, + read_tail ); +} + + +/*! + * \brief Set DMA RecFifo Head Pointer Using a Fifo Id + * + * Set a DMA reception fifo's head pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * \param[in] va_head The virtual address of the head of the fifo. + * + * \return None + * + */ +__INLINE__ void DMA_RecFifoSetHeadById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id, + void *va_head + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + DMA_FifoSetHead( &fg_ptr->fifos[fifo_id].dma_fifo, + va_head); +} + + +/*! + * \brief Set DMA RecFifo Tail Pointer Using a Fifo Id + * + * Set a DMA reception fifo's tail pointer, given a fifo group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * \param[in] va_tail The virtual address of the tail of the fifo. + * + * \return None + * + */ +__INLINE__ void DMA_RecFifoSetTailById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id, + void *va_tail + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + DMA_FifoSetTail( &fg_ptr->fifos[fifo_id].dma_fifo, + va_tail); +} + + +/*! + * \brief Increment DMA RecFifo Head Pointer Using a Fifo Id + * + * Increment a DMA reception fifo's head pointer, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * \param[in] incr The number of quads (16 byte units) to increment the + * head pointer by. + * + * \return None + * +*/ +__INLINE__ void DMA_RecFifoIncrementHeadById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id, + unsigned int incr + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + DMA_RecFifoIncrementHead( &fg_ptr->fifos[fifo_id], + incr); +} + + +/*! + * \brief Get DMA RecFifo Not Empty Status Using a Fifo Id + * + * Get a DMA reception fifo's not empty status, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * + * \retval 0 The specified fifo is empty. + * non-zero The specified fifo is not empty. + * + */ +__INLINE__ unsigned DMA_RecFifoGetNotEmptyById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + { + int word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id); + + unsigned status; + status = DMA_RecFifoGetNotEmpty(fg_ptr, + word); + if ( word == 0 ) + { + /* If normal fifo, mask with the correct bit number. */ + status = status & _BN(fg_ptr->fifos[fifo_id].global_fifo_id); + } + /* For header fifo, don't need additional mask because the status word was + * already masked by the 8 bits for this group, leaving only the 1 bit for + * the header fifo. + */ + + return status; + } + +} + + +/*! + * \brief Get DMA RecFifo Available Status Using a Fifo Id + * + * Get a DMA reception fifo's available status, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * + * \retval 0 The specified fifo is not available. + * non-zero The specified fifo is available. + * + */ +__INLINE__ unsigned DMA_RecFifoGetAvailableById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + { + int word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id); + + unsigned status; + status = DMA_RecFifoGetAvailable(fg_ptr, + word); + if ( word == 0 ) + { + /* If normal fifo, mask with the correct bit number. */ + status = status & _BN(fg_ptr->fifos[fifo_id].global_fifo_id); + } + /* For header fifo, don't need additional mask because the status word was + * already masked by the 8 bits for this group, leaving only the 1 bit for + * the header fifo. + */ + + return status; + } + +} + + +/*! + * \brief Get DMA RecFifo Threshold Crossed Status Using a Fifo Id + * + * Get a DMA reception fifo's threshold crossed status, given a fifo group and + * fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * + * \retval 0 The specified fifo has not had its threshold crossed + * non-zero The specified fifo has had its threshold crossed + * + */ +__INLINE__ unsigned DMA_RecFifoGetThresholdCrossedById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + { + int word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id); + + unsigned status; + status = DMA_RecFifoGetThresholdCrossed(fg_ptr, + word); + if ( word == 0 ) + { + /* If normal fifo, mask with the correct bit number. */ + status = status & _BN(fg_ptr->fifos[fifo_id].global_fifo_id); + } + /* For header fifo, don't need additional mask because the status word was + * already masked by the 8 bits for this group, leaving only the 1 bit for + * the header fifo. + */ + + return status; + } + +} + + +/*! + * \brief Set DMA RecFifo Clear Threshold Crossed Status Using a Fifo Id + * + * Set a DMA reception fifo's "clear threshold crossed" status, given a fifo + * group and fifo id. + * + * \param[in] fg_ptr Pointer to the fifo group structure + * \param[in] fifo_id Id of the fifo within the group + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * + * \return None + * + */ +__INLINE__ void DMA_RecFifoSetClearThresholdCrossedById( + DMA_RecFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + unsigned int clr; + int word; + + SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) ); + SPI_assert( fg_ptr != NULL ); + + word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id); + + if ( word == 0 ) + { + /* If normal fifo, mask with the correct bit number so we only specify the + * bit corresponding to this normal fifo. + * Note: The fg_ptr->mask shouldn't be necessary, but it is a bit safer. + */ + clr = ( _BN(fg_ptr->fifos[fifo_id].global_fifo_id) & fg_ptr->mask); + } + else + { + /* If header fifo, it is ok to just clear all of the mask bits for this + * group, since only 1 bit is used inside the DMA. + */ + clr = fg_ptr->mask; + } + + DMA_RecFifoSetClearThresholdCrossed(fg_ptr, + clr, + word); /* Write to the DMA SRAM */ +} + + +/*! + * \brief Register a Reception Fifo Receive Function + * + * Register a specified receive function to handle packets having a specific + * "registration ID". It returns a registration ID (0-255) that is to be used + * in the packet header Func_Id field, such that packets that arrive in a + * reception fifo will result in the corresponding receive function being called + * when that fifo is processed by a polling or interrupt handler function. + * + * \param[in] recv_func Pointer to the receive function. + * \param[in] recv_func_parm Arbitrary pointer to be passed to the + * recv_func when it is called. + * \param[in] is_error_function 1 means this is the receiver function + * to be called if a packet contains an invalid + * (unregistered) registration ID. The return + * value from this function is zero, indicating + * success, not indicating a registration ID. + * A default function is provided if one is not + * registered. If there is already a non-default + * error receive function registered, -EBUSY is + * returned. + * 0 means this is not the error receiver + * function. + * \param[in] is_header_fifo Indicates whether the fifo is normal or + * header: + * - 0 is normal. The return code is the + * registration ID. + * - 1 is header. The return code is 0, + * indicating success, because packets in + * header fifos are direct-put packets, and + * hence have no registration ID. + * If there is already a header receive function + * registered, -EBUSY is returned. + * + * If both is_error_function and is_header_fifo are 1, -EINVAL is returned. + * + * \retval 0 This is a registration ID if is_error_function=0 and + * is_header_fifo=0. Otherwise, it indicates success. + * 1-255 This is a registration ID. Successful. + * negative Failure. This is a negative errno value. + */ +int DMA_RecFifoRegisterRecvFunction( + DMA_RecFifoRecvFunction_t recv_func, + void *recv_func_parm, + int is_error_function, + int is_header_fifo + ); + + +/*! + * \brief De-Register a Reception Fifo Receive Function + * + * De-register a previously registered receive function. + * + * \param[in] registrationId Registration Id returned from + * DMA_RecFifoRegisterRecvFunction (0..255). + * A negative value means that no + * registration id is specified. + * \param[in] is_error_function 1 means the error receive function is + * to be de-registered. + * 0 otherwise. + * \param[in] is_header_fifo 1 means the header fifo receive function is + * to be de-registered. + * 0 otherwise. + * + * \retval 0 Success + * negative Error value + * + * \see DMA_RecFifoRegisterRecvFunction + */ +int DMA_RecFifoDeRegisterRecvFunction( + int registrationId, + int is_error_function, + int is_header_fifo + ); + + +/*! + * \brief Poll Normal Reception Fifos + * + * Poll the "normal" reception fifos in the specified fifo group, removing one + * packet after another from the fifos, dispatching the appropriate receive + * function for each packet, until one of the following occurs: + * 1. Total_packets packets are received + * 2. All the fifos are empty + * 3. A receive function returns a non-zero value + * 4. The last packet removed from a fifo has an invalid registration id. The + * error receive function will have been called, but polling ends. + * The invalid packet is counted as a processed packet, and the return + * code from the error receive function is returned. + * + * Polling occurs in a round-robin fashion through the array of normal fifos in + * the group, beginning with array index starting_index. If a fifo has a packet, + * the appropriate receive function is called. Upon return, the packet is + * removed from the fifo (the fifo head is moved past the packet). + * + * After processing packets_per_fifo packets in a fifo (or emptying that fifo), + * the next fifo in the group is processed. When the last index in the fifo + * array is processed, processing continues with the first fifo in the array. + * Multiple loops through the array of fifos in the group may occur. + * + * The receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always be 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always return a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. The packet header and payload are + * 16-byte aligned for optimized copying. + * + * \param[in] total_packets The maximum number of packets that will be + * processed. + * \param[in] packets_per_fifo The maximum number of packets that will be + * processed in a given fifo before switching + * to the next fifo. + * \param[in] starting_index The fifos in the fifo group are maintained + * in an array. This is the array index of the + * first fifo to be processed (0 through + * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1). + * \param[in] num_empty_passes The number of passes over the normal fifos + * while they are empty that this function + * should tolerate before giving up and + * returning. This is an optimization + * to catch late arriving packets. + * \param[in] not_empty_poll_delay The number of pclks to delay between polls + * of the not-empty status when the fifos are + * empty. + * \param[in] fg_ptr Pointer to the fifo group. + * \param[out] next_fifo_index Pointer to an int where the recommended + * starting_index for the next call is returned. + * + * \retval num_packets_received The number of packets received and processed. + * next_fifo_index is set. + * \retval negative_value The return code from the receive function that + * caused polling to end. next_fifo_index is + * set. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * \todo By setting fg_ptr->interrupt_lock? or by calling + * the system call to disable a class of interrupts? + * + * \note next_fifo_index is set to the index of the fifo that had the last + * packet received if all packets_per_fifo packets were not received from + * that fifo. However, if all packets_per_fifo packets were received + * from that fifo, the index of the next fifo will be returned. + * + */ +int DMA_RecFifoPollNormalFifos(int total_packets, + int packets_per_fifo, + int starting_index, + int num_empty_passes, + int not_empty_poll_delay, + DMA_RecFifoGroup_t *fg_ptr, + int *next_fifo_index + ); + + +/*! + * \brief Simple Poll Normal Reception Fifos + * + * Poll the "normal" reception fifos in the specified fifo group, removing one + * packet after another from the fifos, dispatching the appropriate receive + * function for each packet, until one of the following occurs: + * 1. All packets in all of the fifos have been received. + * 2. A receive function returns a non-zero value. + * 3. The last packet removed from a fifo has an invalid registration id. The + * error receive function will have been called, but polling ends. + * The invalid packet is counted as a processed packet, and the return + * code from the error receive function is returned. + * 4. There have been fruitfulPollLimit polls attempted (summed across all + * fifos). + * + * Polling occurs in a round-robin fashion through the array of normal fifos in + * the group. If a fifo has a packet, the appropriate receive function is + * called. Upon return, the packet is removed from the fifo (the fifo head is + * moved past the packet). + * + * After processing all of the packets in a fifo (or emptying that fifo), + * the next fifo in the group is processed. When the last index in the fifo + * array is processed, processing continues with the first fifo in the array. + * Multiple loops through the array of fifos in the group may occur until all + * fifos are empty or fruitfulPollLimit polls have been completed. + * + * It is risky to set the fruitfulPollLimit to zero, allowing this function to + * poll indefinitely as long as there are packets to be processed. This may + * starve the node in a scenario where other nodes send "polling" packets to + * our node, and our node never gets a chance to do anything else except + * process those polling packets. + * + * The receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always be 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always return a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. The packet header and payload are + * 16-byte aligned for optimized copying. + * + * \param[in] fg_ptr Pointer to the fifo group. + * \param[in] fruitfulPollLimit The limit on the number of fruitful polls that + * will be attempted (summed across all fifos). + * If the limit is reached, this function + * returns. A value of zero means there is no + * limit imposed. A fruitful poll is one where + * at least one packet has arrived in the fifo + * since the last poll. + * + * \retval num_packets_received The number of packets received and processed. + + * \retval negative_value The return code from the receive function that + * caused polling to end. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * + */ +int DMA_RecFifoSimplePollNormalFifos( DMA_RecFifoGroup_t *fg_ptr, + int fruitfulPollLimit); + +/*! + * \brief Poll Normal Reception Fifo Given a Fifo Group and Fifo ID + * + * Poll the specified "normal" reception fifo in the specified fifo group, + * removing one packet after another from the fifo, dispatching the appropriate + * receive function for each packet, until one of the following occurs: + * 1. num_packets packets are received + * 2. The specified fifo is empty + * 3. A receive function returns a non-zero value + * 4. The last packet removed from the fifo has an invalid registration id. The + * error receive function will have been called, but polling ends. + * The invalid packet is counted as a processed packet, and the return + * code from the error receive function is returned. + * + * If the specified fifo has a packet, the appropriate receive function is + * called. Upon return, the packet is removed from the fifo (the fifo head is + * moved past the packet). + * + * After processing num_packets packets in the fifo (or emptying that fifo), + * the function returns the number of packets processed * + * The receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always be 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always return a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. The packet header and payload are + * 16-byte aligned for optimized copying. + * + * \param[in] num_packets The maximum number of packets that will be + * processed. + * \param[in] fifo_id The ID of the fifo to be polled. + * (0 through + * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1). + * \param[in] num_empty_passes The number of passes over the fifo + * while it is empty that this function + * should tolerate before giving up and + * returning. This is an optimization + * to catch late arriving packets. + * \param[in] not_empty_poll_delay The number of pclks to delay between polls + * of the not-empty status when the fifos are + * empty. + * \param[in] fg_ptr Pointer to the fifo group. + * + * \param[in] empty_callback Function to call when spinning because the FIFO looks empty. + * + * \retval num_packets_received The number of packets received and processed. + * \retval negative_value The return code from the receive function that + * caused polling to end. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * \todo By setting fg_ptr->interrupt_lock? or by calling + * the system call to disable a class of interrupts? + * + */ +int DMA_RecFifoPollNormalFifoById( int num_packets, + int fifo_id, + int num_empty_passes, + int not_empty_poll_delay, + DMA_RecFifoGroup_t *fg_ptr, + void (*empty_callback)(void) + ); + + +/*! + * \brief Simple Poll Normal Reception Fifo Given a Fifo Group and Fifo ID + * + * Poll the specified "normal" reception fifo in the specified fifo group, + * removing one packet after another from the fifo, dispatching the appropriate + * receive function for each packet, until one of the following occurs: + * 1. All packets in the fifo have been received. + * 2. The specified fifo is empty. + * 3. A receive function returns a non-zero value. + * 4. The last packet removed from the fifo has an invalid registration id. The + * error receive function will have been called, but polling ends. + * The invalid packet is counted as a processed packet, and the return + * code from the error receive function is returned. + * 5. There have been fruitfulPollLimit polls attempted. + * + * If the specified fifo has a packet, the appropriate receive function is + * called. Upon return, the packet is removed from the fifo (the fifo head is + * moved past the packet). + * + * After processing all of the packets in the fifo (emptying that fifo), + * or the fruitfulPollLimit has been reached, the function returns the number + * of packets processed. + * + * It is risky to set the fruitfulPollLimit to zero, allowing this function to + * poll indefinitely as long as there are packets to be processed. This may + * starve the node in a scenario where other nodes send "polling" packets to + * our node, and our node never gets a chance to do anything else except + * process those polling packets. + * + * The receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always be 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always pass a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function has + * to copy it to some other location. The packet header and payload are + * 16-byte aligned for optimized copying. + * + * \param[in] fifo_id The ID of the fifo to be polled. + * (0 through + * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1). + * \param[in] fg_ptr Pointer to the fifo group. + * \param[in] fruitfulPollLimit The limit on the number of fruitful polls that + * will be attempted. + * If the limit is reached, this function + * returns. A value of zero means there is no + * limit imposed. A fruitful poll is one where + * at least one packet has arrived in the fifo + * since the last poll. + * + * \retval num_packets_received The number of packets received and processed. + * \retval negative_value The return code from the receive function that + * caused polling to end. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * + */ +int DMA_RecFifoSimplePollNormalFifoById( int fifo_id, + DMA_RecFifoGroup_t *fg_ptr, + int fruitfulPollLimit + ); + + + +/*! + * \brief Poll Header Reception Fifo Given a Fifo Group + * + * Poll the "header" reception fifo in the specified fifo group, + * removing one packet after another from the fifo, dispatching the appropriate + * receive function for each packet, until one of the following occurs: + * 1. Total_packets packets are received + * 2. The specified fifo is empty + * 3. A receive function returns a non-zero value + * + * If the header fifo has a packet, the appropriate receive function is + * called. Upon return, the packet is removed from the fifo (the fifo head is + * moved past the packet). + * + * After processing num_packets packets in the fifo (or emptying that fifo), + * the function returns the number of packets processed. + * + * The receive function must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header. The packet header is always + * 16 bytes of contiguous storage, in the fifo. When the + * receive function returns, user code cannot assume that the buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. The packet header is 16-byte aligned + * for optimized copying. + * + * \param[in] num_packets The maximum number of packets that will be + * processed. + * \param[in] num_empty_passes The number of passes over the fifo + * while it is empty that this function + * should tolerate before giving up and + * returning. This is an optimization + * to catch late arriving packets. + * \param[in] not_empty_poll_delay The number of pclks to delay between polls + * of the not-empty status when the fifos are + * empty. + * \param[in] fg_ptr Pointer to the fifo group. + * + * \retval num_packets_received The number of packets received and processed. + * \retval negative_value The return code from the receive function that + * caused polling to end. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * + */ +int DMA_RecFifoPollHeaderFifo( int num_packets, + int num_empty_passes, + int not_empty_poll_delay, + DMA_RecFifoGroup_t *fg_ptr + ); + + + +__END_DECLS + + +#endif diff --git a/arch/powerpc/include/spi/bpcore_interface.h b/arch/powerpc/include/spi/bpcore_interface.h new file mode 100644 index 00000000000000..59e0ee2a22d3bf --- /dev/null +++ b/arch/powerpc/include/spi/bpcore_interface.h @@ -0,0 +1,41 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/** + * \file spi/bpcore_interface.h + */ +#ifndef _BGP_BPCORE_INT_H_ /* Prevent multiple inclusion */ +#define _BGP_BPCORE_INT_H_ + +#define _BGP_UA_SCRATCH (0x4) /* eDRAM Scratch: 0 to 8MB */ +#define _BGP_PA_SCRATCH (0x00000000) +#define _BGP_PS_SCRATCH (8 * 1024 * 1024) +#define _BGP_PM_SCRATCH (0x007FFFFF) + +/* ************************************************************************* */ +/* DMA Non-Fatal Interrupt Request: Group 3 bits 00:31 */ +/* ************************************************************************* */ + +#define _BGP_IC_DMA_NFT_G3_HIER_POS 3 +#define _BGP_IC_DMA_NFT_G3_UNIT_NUM 3 +#define _BGP_IC_DMA_NFT_G3_UNIT_POS 0 +#define _BGP_IC_DMA_NFT_G3_UNIT_SIZE 32 +#define _BGP_IC_DMA_NFT_G3_UNIT_MASK 0xffffffff + + +#endif /* Add nothing below this line */ diff --git a/arch/powerpc/include/spi/kernel_interface.h b/arch/powerpc/include/spi/kernel_interface.h new file mode 100644 index 00000000000000..05ddd30ffd6cb1 --- /dev/null +++ b/arch/powerpc/include/spi/kernel_interface.h @@ -0,0 +1,1982 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/** + * \file spi/kernel_interface.h + */ + +#ifndef _BGP_VIRT2PHYS_H_ /* Prevent multiple inclusion */ +#define _BGP_VIRT2PHYS_H_ + + +#if defined(__LINUX__) || defined(__LINUX_KERNEL__) + +#include <spi/linux_interface.h> + +/* #warning Using LINUX kernel interface for SPI */ + +#else + +#warning Using CNK kernel interface for SPI +#error Should not be using CNK interface, this is in the Linux kernel tree + + +#include <common/namespace.h> + +__BEGIN_DECLS + +#include <common/linkage.h> +#include <bpcore/bgp_types.h> +#include <bpcore/ppc450_core.h> +#include <bpcore/ppc450_inlines.h> +#include <spi/bpcore_interface.h> +#include <spi/bgp_kernel_inlines.h> +#include <common/bgp_ras.h> +#include <cnk/VirtualMap.h> +#include <cnk/vmm.h> +#include <cnk/bgp_SPRG_Usage.h> +#include <cnk/bgp_SysCall_Extensions.h> +#include <fcntl.h> +#include <limits.h> +#include <string.h> +#include <errno.h> + + +#if ((!defined(__CNK__)) && (!defined(__BL__))) +#include <pthread.h> +#endif + +#ifndef __INLINE__ +#define __INLINE__ extern inline +#endif + + + +/*! + * \brief Communication Thread interrupt handler function prototype + * + * \param[in] arg1 1st argument to commthread + * \param[in] arg2 2nd argument to commthread + * \param[in] arg3 3rd argument to commthread + */ +typedef void (*Kernel_CommThreadHandler)(uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); + +/*! + * \brief Interrupt Group Prototype + * + * This data type is used to group interrupts of various devices together + * so they can be enabled or disabled simultaneously. A given interrupt user + * (eg. messaging, QCD, etc) specifies a value of this data type when its + * interrupt resources are allocated. The kernel associates those resources + * with the specified value so when this value is specified on the enable or + * disable interupts system call, all of the interrupts in the group are + * operated upon. Examples of devices that can be grouped in this way include + * DMA fifos, torus, tree, etc. + * + * \todo The kernel should provide interfaces to allocate a + * Kernel_InterruptGroup_t and deallocate it. + */ +typedef void * Kernel_InterruptGroup_t; + + + + + +/*! \brief Returns the number of Processes (Virtual Nodes) running on this Physical Node. + * + * \return Process Count + * \retval 1 Running in Single Process "SMP Mode" + * \retval 2 Running in "2 Virtual Node Mode" + * \retval 3 Running in "3 Virtual Node Mode" + * \retval 4 Running in "4 Virtual Node Mode" + */ +/* __INLINE__ int Kernel_ProcessCount( void ) +{ + uint32_t shm = _bgp_mfspr( _BGP_SPRGRO_SHMem ); + + return( (shm & 0x3) + 1 ); +} +*/ +/*! \brief Returns the number of Processors (cores) running in this Process (Virtual Node) + * + * \return Processor Count + * \retval 1 Single Processor in this Process (usually 4-VN Mode). + * \retval 2 Two Processors in this Process (usually 2-VN Mode). + * \retval 3 Three Processors in this Process. + * \retval 4 Four Processors in this Process (usually SMP Mode). + */ +/* __INLINE__ int Kernel_ProcessorCount( void ) +{ + uint32_t shm = _bgp_mfspr( _BGP_SPRGRO_SHMem ); + + return( ((shm & 0xC) >> 2) + 1 ); +} +*/ +__INLINE__ int Kernel_GetAppSegmentCount(uint32_t* count) +{ + _BGP_SprgShMem shm; + + shm.shmem = _bgp_mfspr(_BGP_SPRGRO_SHMem); + if(shm.IsStaticMap) + { + if(Kernel_ProcessCount() == 1) + { + *count = 3; /* text/rodata, data, heap */ + } + else + { + *count = 4; /* text/rodata, data, heap, shared (in dual/vn) */ + } + } + else + { + if(Kernel_ProcessCount() == 1) + { + *count = 2; /* text/rodata, data/heap */ + } + else + { + *count = 3; /* text/rodata, data/heap, shared (in dual/vn) */ + } + } + return 0; +} + +__INLINE__ int Kernel_GetAppSegmentMapping(uint32_t segmentID, uint32_t coreID, uint32_t* va, uint64_t* pa, uint32_t* length) +{ + int rc = 0; + _BGP_SprgShMem shm; + shm.shmem = _bgp_mfspr(_BGP_SPRGRO_SHMem); + if((!shm.IsStaticMap)&&(segmentID > 1)) + segmentID++; + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_GETAPPSEGMENTMAPPING), + "r" (segmentID), + "r" (coreID), + "r" (va), + "r" (pa), + "r" (length) + : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" ); + return rc; +} + +extern int KERNEL_VIRTUAL2PHYSICAL_static_v2p_initialized; +extern uint32_t KERNEL_VIRTUAL2PHYSICAL_segcnt; + +#define KERNEL_V2P_MAXSEGMENTS 5 +extern uint32_t KERNEL_VIRTUAL2PHYSICAL_segva[KERNEL_V2P_MAXSEGMENTS]; +extern uint64_t KERNEL_VIRTUAL2PHYSICAL_segpa[KERNEL_V2P_MAXSEGMENTS]; +extern size_t KERNEL_VIRTUAL2PHYSICAL_segsz[KERNEL_V2P_MAXSEGMENTS]; +#undef KERNEL_V2P_MAXSEGMENTS + +/*! \brief Translate a 32bit Virtual Address to a 36bit Physical Address, returning separated upper and lower parts. + * + * \param[in] pVA 32bit virtual address in the calling process + * \param[in] vsize size in bytes of the virtual range + * \param[out] ua_out upper 4 physical address bits + * \param[out] pa_out lower 32 physical address bits + * \return Error condition for translation + * \retval 0 Successful translation, with ua_out and pa_out filled in + * \retval -1 Invalid Virtual Address for this process, ua_out and pa_out unmodified. + * \retval -2 The range from vaddr to (vaddr+vsize) is not physically contiguous. + * \retval -3 vaddr in Scratch, but no Scratch, or not enough Scratch, is enabled. + * \retval -4 invalid parameter + * + * \warning Supports only Text, Data, Stack, and (optional) eDRAM Scratch translation + * \warning CNK "pagesize" is 1MB. + * \warning Text and Data are virtually contiguous, but not necessarily physically contiguous. + * \todo Does not (currently) support > 4GB DDR space. + * \todo Does not (currently) support Shared Memory Area. + */ +__INLINE__ int Kernel_Virtual2Physical( void *pVA, /* input: 32bit Virtual start address */ + size_t vsize, /* input: size in bytes of virtual range */ + uint32_t *ua_out, /* output: upper 4 Physical Address bits */ + uint32_t *pa_out ) /* output: lower 32 Physical Address bits */ +{ + _BGP_SprgShMem shmem; + + shmem.shmem = _bgp_mfspr(_BGP_SPRGRO_SHMem); + if(shmem.IsStaticMap) + { + uint32_t x; + + if(KERNEL_VIRTUAL2PHYSICAL_static_v2p_initialized == 0) + { + Kernel_GetAppSegmentCount(&KERNEL_VIRTUAL2PHYSICAL_segcnt); + for(x=0; x<KERNEL_VIRTUAL2PHYSICAL_segcnt; x++) + { + if(Kernel_GetAppSegmentMapping(x, Kernel_PhysicalProcessorID(), &KERNEL_VIRTUAL2PHYSICAL_segva[x], &KERNEL_VIRTUAL2PHYSICAL_segpa[x], &KERNEL_VIRTUAL2PHYSICAL_segsz[x])) + return -1; + } + KERNEL_VIRTUAL2PHYSICAL_static_v2p_initialized = 1; + } + for(x=0; x<KERNEL_VIRTUAL2PHYSICAL_segcnt; x++) + { + if(((uint32_t)pVA >= KERNEL_VIRTUAL2PHYSICAL_segva[x]) && (KERNEL_VIRTUAL2PHYSICAL_segsz[x] > (uint32_t)pVA - KERNEL_VIRTUAL2PHYSICAL_segva[x] + vsize) && ((uint32_t)pVA + vsize > (uint32_t)pVA)) + { + *ua_out = (uint32_t)((KERNEL_VIRTUAL2PHYSICAL_segpa[x] + ((uint32_t)pVA-KERNEL_VIRTUAL2PHYSICAL_segva[x])) >> 32); + *pa_out = (uint32_t)((KERNEL_VIRTUAL2PHYSICAL_segpa[x] + ((uint32_t)pVA-KERNEL_VIRTUAL2PHYSICAL_segva[x]))&0xffffffff); + return 0; + } + } + return -1; + } + + uint32_t vaddr = (uint32_t)pVA; + uint32_t texti = _bgp_mfspr( _BGP_SPRGRO_TextI ); + uint32_t datai = _bgp_mfspr( _BGP_SPRGRO_DataI ); + uint32_t dst2 = _bgp_mfspr( _BGP_SPRGRO_DST2 ); + uint32_t shm = (_bgp_mfspr( _BGP_SPRGRO_SHMem ) & 0xFFFFFFC0); + uint32_t text_v_start = (texti & 0xFFF00000); + uint32_t data_v_start = (datai & 0xFFF00000); /* text_v_limit is (data_v_start - 1) */ + uint32_t text_ua = ((texti & 0x000000C0) >> 6); + uint32_t text_p_start = ((texti & 0x000FFF00) << 12); + uint32_t data_ua = ((datai & 0x000000C0) >> 6); + uint32_t data_p_start = ((datai & 0x000FFF00) << 12); + uint32_t data_v_size = (dst2 & 0xFFF00000); + uint32_t data_v_limit = (data_v_start + data_v_size + _BGP_VMM_PAGE_MASK); + uint32_t vend = (vaddr + vsize - 1); + uint32_t vpage = (vaddr & ~_BGP_VMM_PAGE_MASK); /* which 1MB page? */ + uint32_t voffset = (vaddr & _BGP_VMM_PAGE_MASK); /* offset within 1MB page */ + + /* printf("V2P: texti=0x%08x, datai=0x%08x, dst2=0x%08x\n", texti, datai, dst2 ); */ + /* printf("V2P: vaddr=0x%08x, vend=0x%08x, text_v_start=0x%08x, data_v_limit=0x%08x\n", */ + /* vaddr, vend, text_v_start, data_v_limit ); */ + + /* parm check */ + if ( !vsize || !ua_out || !pa_out ) + return(-4); + + /* range check: below text or off end of data, or in eDRAM Scratch */ + if ( (vaddr < text_v_start) || (vend > data_v_limit) ) + { + /* Scratch? */ + if ( vaddr >= _BGP_VA_SCRATCH ) + { + uint32_t scratchMB = ((dst2 & 0x00000078) << (20-3)); + uint32_t scratch_end = (_BGP_VA_SCRATCH + scratchMB); + + if ( !scratchMB || (vend > scratch_end) ) + return(-3); + + *ua_out = (uint32_t)_BGP_UA_SCRATCH; + *pa_out = (vaddr & _BGP_VM_SCRATCH); + return(0); + } + else if ( shm ) /* Shared Memory? If any, always mapped V=R. */ + { + uint32_t shm_v_start = (shm & 0xFFF00000); + uint32_t shm_v_end = (shm_v_start + ((shm & 0x000FFF00) << 12)); + uint32_t shm_ua = ((shm & 0x000000C0) >> 6); + + if ( (vaddr >= shm_v_start) && (vend <= shm_v_end) ) + { + *ua_out = shm_ua; + *pa_out = vaddr; + return(0); + } + } + + return(-1); + } + + /* Text? (includes Read-Only Data) */ + if ( vaddr < data_v_start ) + { + /* if range starts in Text but ends in Data, then discontiguous */ + if ( vend >= data_v_start ) + return(-2); + + *ua_out = text_ua; + *pa_out = (text_p_start + (vpage - text_v_start) + voffset); + + return(0); + } + + /* Data */ + *ua_out = data_ua; + *pa_out = (data_p_start + (vpage - data_v_start) + voffset); + + return(0); +} + + +/*! \brief Returns a copy of the node's personality + * + * \param[out] personality Location of personality structure that will be filled in by Kernel_GetPersonality + * \param[in] size Size, in bytes, that was allocated to hold the personality structure + * \return Error indication + * \retval 0 Success + * \retval -1 Invalid parameters + */ +__INLINE__ int Kernel_GetPersonality(_BGP_Personality_t* personality, size_t size) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + asm __volatile__ ("li 0,%3;" + "mr 3,%1;" + "mr 4,%2;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "r" (personality), + "r" (size), + "i" (_BGP_SYSCALL_NR_GET_PERSONALITY) + : "r0", "r3", "r4", "cc", "memory" ); + + return( rc ); +} + +/*! \brief Starts to checkpoint/restore the Kernel data structures for CNK + * + * \param[out] personality Location of personality structure that will be filled in by Kernel_GetPersonality + * \param[in] size Size, in bytes, that was allocated to hold the personality structure + * \param[in] int operation, The type of operation that the kernel needs to provide (e.g. CHECKPOINT_START, CHECKPOINT_RESTART,CHECKPOINT_COMPLETE) + * \return Error indication + * \retval 0 Success + * \retval -1 Invalid parameters + */ +__INLINE__ int Kernel_checkpoint(int component, int operation, void *buffer, uint32_t size, uint32_t *actualSize, uint32_t*basePtr) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "mr 8,%7;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_CHECKPOINT), + "r" (component), + "r" (operation), + "r" (buffer), + "r" (size), + "r" (actualSize), + "r" (basePtr) + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "cc", "memory" ); + + return( rc ); +} + +/*! \brief Returns the contents of the running PPC450's processor version register. + * \return Contents of PPC450 PVR register + */ +__INLINE__ int Kernel_GetProcessorVersion() +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "sc;" + "mr %0, 3;" + + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_GET_PERSONALITY) + : "r0", "r3", "cc", "memory" ); + + return( rc ); +} + +/*! \brief LockBox allocate syscall definition + * \param[in] lockid Indicates which counter ID is to be obtained. Counter IDs vary from 0-1023 + * \param[in] numlocks The number of sequencial counter IDs that will be obtained + * \param[out] ptr An array of pointers that will be filled in with the counter virtual addresses. + * \param[in] flags Optional flags + * \warning Must storage indicated by ptr must be large enough to whole numlocks*sizeof(uint32_t) bytes + * \internal This is an internal syscall - do not use. + * \see LockBox_AllocateCounter + * \see LockBox_AllocateMutex + * \see LockBox_AllocateBarrier + */ +/* __INLINE__ int Kernel_AllocateLockBox(uint32_t lockid, uint32_t numlocks, uint32_t** ptr, uint32_t flags) */ +/* { */ +/* int rc = 0; // this syscall returns RC in r3 and does not use errno */ +/* */ +/* asm __volatile__ ("li 0,%1;" */ +/* "mr 3,%2;" */ +/* "mr 4,%3;" */ +/* "mr 5,%4;" */ +/* "mr 6,%5;" */ +/* / "sc;" */ +/* "mr %0, 3;" */ +/* : "=&r" (rc) // early clobber */ +/* : "i" (_BGP_SYSCALL_NR_ALLOC_COUNTER), */ +/* "r" (lockid), */ +/* "r" (numlocks), */ +/* "r" (ptr), */ +/* "r" (flags) */ +/* / : "r0", "r3", "r4", "r5", "r6", "cc", "memory" ); */ +/* */ +/* return( rc ); */ +/* } */ + +/*! \brief Converts a Rank into a XYZT Coordinate + * + * \param[in] rank Rank for the node + * \param[out] xcoord X Coordinate for the specified node + * \param[out] ycoord Y Coordinate for the specified node + * \param[out] zcoord Z Coordinate for the specified node + * \param[out] tcoord T Coordinate for the specified node + * \return Error status + * \retval 0 Success + * \retval non-zero Error + */ +__INLINE__ int Kernel_Rank2Coord(uint32_t rank, uint32_t* xcoord, uint32_t* ycoord, uint32_t* zcoord, uint32_t* tcoord) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_RANK2COORD), + "r" (rank), + "r" (xcoord), + "r" (ycoord), + "r" (zcoord), + "r" (tcoord) + : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" ); + + return( rc ); +} + +/*! \brief Converts all ranks into a XYZT Coordinate + * + * \param[out] XYZT coordinates of all nodes. The array is in + * rank order. If a rank is not mapped, its coordinates will be + * (255,255,255,255). + * \param[in] len specifies the length of the coordinates array. That is, + * the caller is guaranteeing that there is storage for coordinates[0], + * coordinates[1], ..., coordinates[len-1]. + * \return Error status + * \retval 0 Success + * \retval non-zero Error + */ + +typedef struct _Kernel_Coordinates { + unsigned char x; + unsigned char y; + unsigned char z; + unsigned char t; +} kernel_coords_t; + +__INLINE__ int Kernel_Ranks2Coords(kernel_coords_t* coordinates, uint32_t len) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_RANKS2COORDS), + "r" (coordinates), + "r" (len) + : "r0", "r3", "cc", "memory" ); + + return( rc ); +} + +/*! \brief Convert a XYZT Coordinate into a Rank. Also returns number of nodes + * \param[in] xcoord X Coordinate used to specify the desired node + * \param[in] ycoord Y Coordinate used to specify the desired node + * \param[in] zcoord Z Coordinate used to specify the desired node + * \param[in] tcoord T Coordinate used to specify the desired node + * \param[out] rank Rank of the desired node + * \param[out] numnodes Number of Nodes in the partition + * \return Error indication + * \retval 0 Success + * \retval non-zero Error + */ + +__INLINE__ int Kernel_Coord2Rank(uint32_t xcoord, uint32_t ycoord, uint32_t zcoord, uint32_t tcoord, uint32_t* rank, uint32_t* numnodes) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "mr 8,%7;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_COORD2RANK), + "r" (xcoord), + "r" (ycoord), + "r" (zcoord), + "r" (tcoord), + "r" (rank), + "r" (numnodes) + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "cc", "memory" ); + + return( rc ); +} + +/*! \brief Returns the Job ID + * \return Contains the control system JobID + */ +__INLINE__ uint32_t Kernel_GetJobID() +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_GETJOBID) + : "r0", "r3", "cc", "memory" ); + + return( rc ); +} + +/*! \brief Read from a privileged DCR + * \param[in] dcrid Number of the DCR register + * \param[out] value Contents of DCR register + * \return Error indication + * \retval 0 Success + * \retval -1 Invalid DCR + * \note Only selected previleged DCRs will be accessible via this system call. + */ +__INLINE__ uint32_t Kernel_ReadDCR(uint32_t dcrid, uint32_t* value) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_READDCR), + "r" (dcrid), + "r" (value) + : "r0", "r3", "r4", "cc", "memory" ); + + return( rc ); +} + +/*! \brief Write to a privileged DCR + * \param[in] dcrid Number of the DCR register + * \param[in] value Contents of DCR register + * \return Error indication + * \retval 0 Success + * \retval -1 Invalid DCR + * \note Only selected previleged DCRs will be accessible via this system call. + */ +__INLINE__ uint32_t Kernel_WriteDCR(uint32_t dcrid, uint32_t value) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_WRITEDCR), + "r" (dcrid), + "r" (value) + : "r0", "r3", "r4", "cc", "memory" ); + + return( rc ); +} + +/*! + * \brief Query Free DMA Counter Subgroups within a Group + * + * This function is a wrapper around a system call that returns a list of the + * free (available) subgroups within the specified group. + * + * \param[in] type Specifies whether this is an injection or + * reception counter group (DMA_Type_Injection + * or DMA_Type_Reception) + * \param[in] grp Group number being queried (0 to + * DMA_NUM_COUNTER_GROUPS-1) + * \param[out] num_subgroups Pointer to an int where the number of free + * subgroups in the specified group is returned + * \param[out] subgroups Pointer to an array of num_subgroups ints where + * the list of num_subgroups subgroups is returned. + * Each int is the subgroup number + * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1). The + * caller must provide space for + * DMA_NUM_COUNTERS_PER_SUBGROUP ints, in case the + * entire counter group is free. + * + * \retval 0 Successful. num_subgroups and subgroups array set as described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \internal This function is not intended to be called directly + * \see DMA_CounterGroupQueryFree() + * \note The kernel may need to synchronize with other cores performing + * allocate or free syscalls. + * + */ +__INLINE__ uint32_t Kernel_CounterGroupQueryFree(uint32_t type, uint32_t group, uint32_t* num_subgroups, uint32_t* subgroups) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_COUNTERGRPQUERYFREE), + "r" (type), + "r" (group), + "r" (num_subgroups), + "r" (subgroups) + : "r0", "r3", "r4", "r5", "r6", "cc", "memory" ); + + return( rc ); +} + +/*! + * \brief Allocate DMA Counters From A Group + * + * This function is a wrapper around a system call that allocates DMA counters + * from the specified group. Counters may be allocated in subgroups of + * DMA_NUM_COUNTERS_PER_SUBGROUP counters. Parameters specify how interrupts, + * generated when a counter hits zero, are to be handled. A + * DMA_CounterGroup_t structure is returned for use in other inline + * functions to operate on the allocated counters. + * + * \param[in] type Specifies whether this is an injection or + * reception counter group (DMA_Type_Injection + * or DMA_Type_Reception) + * \param[in] grp Group number whose counters are being allocated + * (0 to DMA_NUM_COUNTER_GROUPS-1) + * \param[in] num_subgroups Number of subgroups to be allocated from the group + * (1 to DMA_NUM_COUNTERS_PER_SUBGROUP) + * \param[in] subgroups Pointer to an array of num_subgroups ints where + * the list of subgroups to be allocated is provided. + * Each int is the subgroup number + * (0 to num_subgroups-1). + * \param[in] target The core that will receive the interrupt when a + * counter in this allocation hits zero + * (0 to DMA_NUM_COUNTER_GROUPS-1) + * \param[in] handler A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * counter in this allocation hits zero. This + * function must be coded to take 4 uint32_t + * parameters: + * - A pointer to storage specific to this + * handler. This is the handler_parm + * specified on this allocation function. + * - Three unint32_t parameters that are not used. + * If handler is NULL, hit-zero interrupts will not + * be enabled for these counters. + * \param[in] handler_parm A pointer to storage that should be passed to the + * interrupt handling function (see handler + * parameter) + * \param[in] interruptGroup A InterruptGroup_t that identifies the + * group of interrupts that the counters being + * allocated will become part of. + * \param[out] cg_ptr Pointer to a structure that is filled in upon + * successful return for use in other inline + * functions to operate on the allocated counters. + * \li counter - Array of software counter + * structures. Each element + * points to the corresponding + * hardware counter in DMA SRAM. + * Pointers are null if not + * allocated). + * Counters are initialized to + * DMA_COUNTER_INIT_VAL, + * disabled, their hit_zero bit + * is off, base and max are NULL. + * \li status_ptr - Points to status area within the + * DMA memory map. + * \li permissions - Bits set for each allocated + * counter + * \li grp_permissions - Permissions for each + * subgroup + * \li group_id - The group number + * \li type - The type of DMA (injection or + * reception) + * + * \retval 0 Successful. Counters allocated and cg_ptr structure filled in as + * described. + * \retval -1 Unsuccessful. errno gives the reason. Nothing has been + * allocated. + * + * \internal This function is not intended to be called directly + * \see DMA_CounterGroupAllocate() + * \note The kernel may need to synchronize with other cores performing queries + * or frees. + * + */ +__INLINE__ uint32_t Kernel_CounterGroupAllocate(uint32_t type, uint32_t group, uint32_t num_subgroups, uint32_t* subgroups, uint32_t target, uint32_t handler, uint32_t* handler_parm, uint32_t interruptGroup, uint32_t* cg_ptr) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "mr 8,%7;" + "mr 9,%8;" + "mr 10,%9;" + "mr 11,%10;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_COUNTERGRPALLOCATE), + "r" (type), + "r" (group), + "r" (num_subgroups), + "r" (subgroups), + "r" (target), + "r" (handler), + "r" (handler_parm), + "r" (interruptGroup), + "r" (cg_ptr) + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc", "memory" ); + + return( rc ); +} + +/*! + * \brief Free DMA Counters From A Group + * + * This function is a wrapper around a system call that frees DMA counters + * from the specified group. Counters may be freed in subgroups of + * DMA_NUM_COUNTERS_PER_SUBGROUP counters. + * + * \param[in] grp Group number whose counters are being freed + * (0 to DMA_NUM_COUNTER_GROUPS-1) + * \param[in] num_subgroups Number of subgroups to be freed from the group + * (1-DMA_NUM_COUNTERS_PER_SUBGROUP) + * \param[in] subgroups Pointer to an array of num_subgroups ints where + * the list of subgroups to be freed is provided. + * Each int is the subgroup number + * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1). + * \param[out] cg_ptr Pointer to the structure previously filled in when + * these counters were allocated. Upon successful + * return, this structure is updated to reflect the + * freed counters: + * \li counter[] - Counter structures Pointers to + * freed counters nulled. + * \li permissions - Bits cleared for each freed + * counter. + * + * \retval 0 Successful. Counters freed and cg_ptr structure updated as + * described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \internal This function is not intended to be called directly + * \see DMA_CounterGroupFree() + * \note The kernel may need to synchronize with other cores performing allocates + * or queries. + */ +__INLINE__ uint32_t Kernel_CounterGroupFree(uint32_t group, uint32_t num_subgroups, uint32_t* subgroups, uint32_t* cg_ptr) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_COUNTERGRPFREE), + "r" (group), + "r" (num_subgroups), + "r" (subgroups), + "r" (cg_ptr) + : "r0", "r3", "r4", "r5", "r6", "cc", "memory" ); + + return( rc ); +} + + +/*! + * \brief Query Free DMA InjFifos within a Group + * + * This function is a wrapper around a system call that returns a list of the + * free (available to be allocated) fifos within the specified group. + * + * \param[in] grp Group number being queried + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1) + * \param[out] num_fifos Pointer to an int where the number of free + * fifos in the specified group is returned + * \param[out] fifo_ids Pointer to an array of num_fifos ints where + * the list of free fifos is returned. + * Each int is the fifo number + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * The caller must provide space for + * DMA_NUM_INJ_FIFOS_PER_GROUP ints, + * in case the entire fifo group is free. + * + * \retval 0 Successful. num_fifos and fifo_ids array set as described. + * \retval -1 Unsuccessful. errno gives the reason. + * \internal This function is not intended to be called directly + * \see DMA_InjFifoGroupQueryFree() + */ +__INLINE__ uint32_t Kernel_InjFifoGroupQueryFree(uint32_t group, uint32_t* num_fifos, uint32_t* fifo_ids) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_INJFIFOGRPQUERYFREE), + "r" (group), + "r" (num_fifos), + "r" (fifo_ids) + : "r0", "r3", "r4", "r5", "cc", "memory" ); + + return( rc ); +} + +/*! + * \brief Allocate DMA InjFifos From A Group + * + * This function is a wrapper around a system call that allocates specified + * DMA injection fifos from the specified group. Parameters specify whether + * each fifo is high or normal priority, local or non-local, and which torus + * fifos it maps to. A DMA_InjFifoGroup_t structure is returned for + * use in other inline functions to operate on the allocated fifos. + * + * Refer to the interrupt discussion at the top of this include file to see why + * there are no interrupt-related parameters. + * + * \param[in] grp Group number whose DMA injection fifos are being + * allocated (0 to DMA_NUM_INJ_FIFO_GROUPS-1) + * \param[in] num_fifos Number of fifos to be allocated from the group + * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP) + * \param[in] fifo_ids Pointer to an array of num_fifos ints where + * the list of fifos to be allocated is provided. + * Each int is the fifo number (0 to num_fifos-1). + * \param[in] priorities Pointer to an array of num_fifos short ints where + * the list of priorities to be assigned to the fifos + * is provided. Each short int indicates the priority + * to be assigned to each of the fifos identified in + * the fifo_ids array (0 is normal, 1 is high priority). + * \param[in] locals Pointer to an array of num_fifos short ints where + * an indication is provided of whether each fifo will + * be used for local transfers (within the same node) + * or torus transfers. Each short int indicates the + * local/non-local attribute to be assigned to each of + * the fifos identified in the fifo_ids array (0 is + * non-local, 1 is local). If 0, the corresponding + * array element in ts_inj_maps indicates which torus + * fifos can be injected. + * \param[in] ts_inj_maps Pointer to an array of num_fifos short ints where + * the torus fifos that can be injected are specified + * for each fifo. Each short int specifies which of + * the 8 torus injection fifos can be injected when a + * descriptor is injected into the DMA injection fifo. + * Must be non-zero when the corresponding "locals" + * is 0. + * \param[out] fg_ptr Pointer to a structure that is filled in upon + * successful return for use in other inline functions + * to operate on the allocated fifos. + * \li fifos - Array of fifo structures. Structures + * for allocated fifos are initialized as + * documented below. Structures for + * fifos not allocated by this instance of + * this syscall are initialized to binary + * zeros. Allocated fifos are enabled. + * \li status_ptr - Points to status area within the + * DMA memory map. + * \li permissions - Bits indicating which fifos were + * allocated during this syscall. + * \li group_id - The id of this group. + * + * \retval 0 Successful. Fifos allocated and fg_ptr structure filled in as + * described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \internal This function is not intended to be called directly + * \see DMA_InjFifoGroupAllocate() + * \return The group fifo structure pointed to by fg_ptr is completely + * initialized as follows: + * - status_ptr points to the appropriate fifo group DMA memory map + * - fifo structures array. Fifo structures for fifos not allocated + * during this syscall are initialized to binary zeros. Fifo + * structures for fifos allocated during this syscall are initialized: + * - fifo_hw_ptr points to the DMA memory map for this fifo. The + * hardware start, end, head, and tail are set to zero by the + * kernel. + * - All other fields in the structure are set to zero by the kernel + * except priority, local, and ts_inj_map are set to reflect what + * was requested in the priorities, locals, and ts_inj_maps + * syscall parameters. + * + */ +__INLINE__ uint32_t Kernel_InjFifoGroupAllocate(uint32_t group, uint32_t num_fifos, uint32_t* fifo_ids, uint16_t* priorities, uint16_t* locals, uint8_t* ts_inj_maps, uint32_t* fg_ptr) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "mr 8,%7;" + "mr 9,%8;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_INJFIFOGRPALLOCATE), + "r" (group), + "r" (num_fifos), + "r" (fifo_ids), + "r" (priorities), + "r" (locals), + "r" (ts_inj_maps), + "r" (fg_ptr) + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" ); + + return( rc ); +} + +/*! + * \brief Free DMA InjFifos From A Group + * + * This function is a wrapper around a system call that frees DMA injection + * counters from the specified group. + * + * \param[in] grp Group number whose DMA injection fifos are being + * freed (0 to DMA_NUM_INJ_FIFO_GROUPS-1) + * \param[in] num_fifos Number of fifos to be freed from the group + * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP) + * \param[in] fifo_ids Pointer to an array of num_fifos ints where + * the list of fifos to be freed is provided. + * Each int is the fifo number (0 to num_fifos-1). + * \param[in] fg_ptr Pointer to the structure previously filled in when + * these fifos were allocated. Upon successful + * return, this structure is updated to reflect the + * freed fifos: + * \li fifos - Structures for freed fifos zero'd. + * Freed fifos are disabled. + * \li permissions - Bits cleared for each freed fifo. + * + * \retval 0 Successful. Fifos freed and fg_ptr structure updated as described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \internal This function is not intended to be called directly + * \see DMA_InjFifoGroupFree() + * \note This is a fatal error if any of the fifos are non empty and activated + * + */ +__INLINE__ uint32_t Kernel_InjFifoGroupFree(uint32_t group, uint32_t num_fifos, uint32_t* fifo_ids, uint32_t* fg_ptr) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_INJFIFOGRPFREE), + "r" (group), + "r" (num_fifos), + "r" (fifo_ids), + "r" (fg_ptr) + : "r0", "r3", "r4", "r5", "r6", "cc", "memory" ); + + return( rc ); +} + +/*! + * \brief DMA InjFifo Initialization By Id + * + * - For an allocated injection DMA fifo, initialize its start, head, tail, and + * end. + * - Compute fifo size and free space. + * - Initialize wrap count. + * - Activate the fifo. + * + * \param[in] fg_ptr Pointer to fifo group structure. + * \param[in] fifo_id Id of the fifo to be initialized + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] va_start Virtual address of the start of the fifo. + * \param[in] va_head Virtual address of the head of the fifo (typically + * equal to va_start). + * \param[in] va_end Virtual address of the end of the fifo. + * + * \retval 0 Successful. + * \retval -1 Unsuccessful. Error checks include + * - va_start < va_end + * - va_start <= va_head <= + * (va_end - DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) + * - va_start and va_end are 32-byte aligned + * - fifo_size is larger than (DMA_MIN_INJECT_SIZE_IN_QUADS + + * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) + * + */ +__INLINE__ uint32_t Kernel_InjFifoInitById(uint32_t* fg_ptr, + int fifo_id, + uint32_t* va_start, + uint32_t* va_head, + uint32_t* va_end) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_INJFIFOINITID), + "r" (fg_ptr), + "r" (fifo_id), + "r" (va_start), + "r" (va_head), + "r" (va_end) + : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" ); + + return( rc ); +} + + +/*! + * \brief Set DMA Reception Fifo Map + * + * This function is a wrapper around a system call that + * - Sets DCRs establishing the map between the hardware torus fifos and the + * DMA reception fifos that are to receive the packets from those hardware + * torus fifos. + * - Sets DCRs establishing the DMA reception fifos that are to receive + * local transfer packets. + * - Sets the DCRs establishing the type (0 or 1) of each reception fifo. + * - Sets the DCRs establishing the threshold for type 0 and 1 reception fifos. + * - Leaves all of the fifos that are used in a "disabled" state. + * DMA_RecFifoInitById() initializes and enables the fifos. + * + * \param[in] rec_map Reception Fifo Map structure, defining the mapping. + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * + * \internal This is an internal syscall + * \see DMA_RecFifoSetMap + * \note This function should be called once per job, after DMA_ResetRelease(). + * It may be called by any core, but once a core has called it, other + * calls by that same core or any other core will fail. + * + * \note During job init, the kernel sets up the DCR clear masks for each + * reception fifo group (DCRs 0xD68 - 0xD6C) such that a write to clear + * a fifo in group g only clears group g. + * + */ +__INLINE__ int Kernel_RecFifoSetMap(uint32_t* rec_map) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_RECFIFOSETMAP), + "r" (rec_map) + : "r0", "r3", "cc", "memory" ); + return( rc ); +} + +/*! + * \brief Get DMA Reception Fifo Map + * + * This function is a wrapper around a system call that returns a DMA + * reception fifo map structure, filled in according to the DCRs. + * + * \param[in,out] rec_map A pointer to a Reception Fifo Map structure + * that will be filled-in upon return. + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * + */ +__INLINE__ int Kernel_RecFifoGetMap(uint32_t* rec_map) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_RECFIFOGETMAP), + "r" (rec_map) + : "r0", "r3", "cc", "memory" ); + return( rc ); +} + +/*! + * \brief Get DMA Reception Fifo Group + * + * This is a wrapper around a System Call. This function returns THE + * one-and-only pointer to the fifo group structure, with the entries all + * filled in from info in the DCRs. If called multiple times with the same + * group, it will always return the same pointer, and the system call will + * not be invoked again. + * + * It must be called AFTER DMA_RecFifoSetMap(). + * + * By convention, the same "target" is used for normal and header fifo + * interrupts (could be changed). In addition, by convention, interrupts for + * fifos in group g come out of the DMA as non-fatal irq bit 28+g, + * ie, only fifos in group g can cause the "type g" threshold interrupts. + * + * \param[in] grp The group number (0 through DMA_NUM_REC_FIFO_GROUPS). + * \param[in] target The core that will receive the interrupt when a + * fifo in this group reaches its threshold + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * Ignored on subsequent call with the same group. + * \param[in] normal_handler A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * normal fifo in this group reaches its threshold. + * This function must be coded to take 4 uint32_t + * parameters: + * - A pointer to storage specific to this + * handler. This is the normal_handler_parm + * specified on this function call. + * - 3 uint32_t parameters that are not used. + * If normal_handler is NULL, threshold interrupts + * are not delivered for normal fifos in this group. + * Ignored on subsequent call with the same group. + * \param[in] normal_handler_parm A pointer to storage that should be passed + * to the normal interrupt handling function + * (see normal_handler parameter). + * Ignored on subsequent call with the same + * group. + * \param[in] header_handler ** This parameter is deprecated. Specify NULL.** + * A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * header fifo in this group reaches its threshold. + * This function must be coded to take 2 parameters: + * void* A pointer to storage specific to this + * handler. This is the header_handler_parm + * specified on this function call. + * int The global fifo ID of the fifo that hit + * its threshold (0 through + * NUM_DMA_REC_FIFOS-1). + * If header_handler is NULL, threshold interrupts + * are not delivered for header fifos in this group. + * Ignored on subsequent call with the same group. + * \param[in] header_handler_parm ** This parameter is deprecated. Specify + * NULL. ** + * A pointer to storage that should be passed + * to the header interrupt handling function + * (see header_handler parameter). + * Ignored on subsequent call with the same + * group. + * \param[in] interruptGroup A InterruptGroup_t that identifies the + * group of interrupts that the fifos in this group + * will become part of. + * Ignored on subsequent call with the same group. + * + * \return RecFifoGroupStruct Pointer to a DMA Reception Fifo Group structure + * that reflects the fifos that are being used in + * this group. This same structure is shared by + * all users of this reception fifo group. + * NULL is returned if an error occurs. + * + * \note The following comments from Phil about the internals of the syscall: + * - error checks + * - 0 <= group_id < 4 + * - the start of the fifo group is a valid virtual address (tlb mapped)? + * - disable the rDMA + * - call _BGP_rDMA_Fifo_Get_Map to get the DCR mapping information + * - loop through the map to determine how many and which fifos in this group + * are used, including headers + * - filling in the addresses of used fifos + * - In particular, any pointer to any fifo in the group that is not used + * will have a null pointer + * - furthermore, + * - write starting values to all used fifos + * - make sure all interrupts are cleared + * - enable rDMA + * + */ +__INLINE__ int Kernel_RecFifoGetFifoGroup( + uint32_t* fifogroup, + int grp, + int target, + Kernel_CommThreadHandler normal_handler, + void *normal_handler_parm, + Kernel_CommThreadHandler header_handler, + void *header_handler_parm, + Kernel_InterruptGroup_t interruptGroup + ) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "mr 8,%7;" + "mr 9,%8;" + "mr 10,%9;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_RECGETFIFOGROUP), + "r" (fifogroup), + "r" (grp), + "r" (target), + "r" (normal_handler), + "r" (normal_handler_parm), + "r" (header_handler), + "r" (header_handler_parm), + "r" (interruptGroup) + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" ); + + return( rc ); +} + +/*! + * \brief DMA RecFifo Initialization By Id + * + * - For a DMA reception fifo, initialize its start, head, tail, and end. + * - Compute fifo size and free space. + * + * \param[in] fg_ptr Pointer to fifo group structure. + * \param[in] fifo_id Id of the fifo to be initialized + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * \param[in] va_start Virtual address of the start of the fifo. + * \param[in] va_head Virtual address of the head of the fifo (typically + * equal to va_start). + * \param[in] va_end Virtual address of the end of the fifo. + * + * \retval 0 Successful. + * \retval -1 Unsuccessful. Error checks include + * - va_start < va_end + * - va_start <= va_head < va_end + * - va_start and va_end are 32-byte aligned + * - fifo_size >= DMA_MIN_REC_FIFO_SIZE_IN_BYTES + * + */ +__INLINE__ int Kernel_RecFifoInitById( + uint32_t* fg_ptr, + int fifo_id, + void *va_start, + void *va_head, + void *va_end + ) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_RECFIFOINITID), + "r" (fg_ptr), + "r" (fifo_id), + "r" (va_start), + "r" (va_head), + "r" (va_end) + : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" ); + + return( rc ); +} + + /*! + * \brief Injects a binary (RAW) RAS message to the control system + * + * Ships a RAS message of the given facility, unit, errcode, and packed data to the control system. No checking is done on the + * correctness of the data. Can be used to simulate a RAS message for testing purposes. + * + * \param[in] facility High level component detecting the condition. (e.g., _bgp_fac_kernel, _bgp_fac_application, _bgp_fac_diags) + * \param[in] unit Unit generating the RAS event. (e.g., _bgp_unit_ppc450, _bgp_unit_snoop) + * \param[in] err_code Error code for RAS event (e.g., _bgp_err_ppc450_l1d_dpe0) + * \param[in] numwords Number of 32-bit integers in the packed binary array + * \param[in] array Pointer to the array of packed binary data. + * + * Restriction. There is currently a limit of eight 32-bit words of packed binary data. + * + * \internal This function is intended for testing purposes only. It should not be used in a production system as it could introduce false RAS messages. + */ + +__INLINE__ int Kernel_InjectRAWRAS( + _BGP_Facility facility, + _BGP_RAS_Units unit, + uint16_t err_code, + int numwords, + const uint32_t* array) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_RAWRASINJECT), + "r" (facility), + "r" (unit), + "r" (err_code), + "r" (numwords), + "r" (array) + : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" ); + + return( rc ); +} + + /*! + * \brief Injects a ASCII (Textual) RAS message to the control system + * + * Ships a RAS message of the given facility, unit, errcode, and an ASCII string to the control system. No checking is done on the + * correctness of the facility or unit. Can be used to simulate a RAS message for testing purposes. + * + * \param[in] facility High level component detecting the condition. (e.g., _bgp_fac_kernel, _bgp_fac_application, _bgp_fac_diags) + * \param[in] unit Unit generating the RAS event. (e.g., _bgp_unit_ppc450, _bgp_unit_snoop) + * \param[in] err_code Error code for RAS event (e.g., _bgp_err_ppc450_l1d_dpe0) + * \param[in] text Pointer to a string of null-terminated ASCII characters + * + * \internal This function is intended for testing purposes only. It should not be used in a production system as it could introduce false RAS messages. + */ +__INLINE__ int Kernel_InjectASCIIRAS( + _BGP_Facility facility, + _BGP_RAS_Units unit, + uint16_t err_code, + const uint8_t* text) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_ASCIIRASINJECT), + "r" (facility), + "r" (unit), + "r" (err_code), + "r" (text) + : "r0", "r3", "r4", "r5", "r6", "cc", "memory" ); + + return( rc ); +} + + + +/*! + * \brief Enables/Disables the counter overflow/underflow interrupts + * + * This function is a wrapper around a system call that can enable or disable the 4 counter overflow/underflow interrupts + * + * \param[in] enable/disable boolean + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * + */ +__INLINE__ int Kernel_ChgCounterInterruptEnables(uint32_t enable) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_CHGDMACTRINTERRUPT), + "r" (enable) + : "r0", "r3", "cc", "memory" ); + return( rc ); +} + + +/*! + * \brief Clears the Full Reception FIFO (DD1 workaround) + * + * This function exists to reset the DMA reception fifos - it is a workaround for DD1 only. It should not be needed in DD2. + * + * NOTE: the implementation has been removed. + */ +__INLINE__ int Kernel_ClearFullReceptionFifo() +{ + return 0; +} + +#include <spi/lockbox_interface.h> + +#if ((!defined(__CNK__)) && (!defined(__BL__))) +/*! \brief Creates a pthread with a commthread attribute + * + * \note CNK restriction: 1 CommThread per core is allowed + * \note In Dual or VNM, each process must allocate its own commthreads + * \note CommThreads are pinned per core. (e.g., in SMP mode, this SPI must be called 4 times to create enough CommThreads for each processor) + * \warning non-portable pthread API + * \param[in] thread pthread_t structure + * \param[in] attr pthread_attr_t structure + * \param[in] start_routine function pointer of the thread's main() + * \param[in] arg 1st argument to the pthread + * \return Error condition from pthread_create + * \retval 0 success + * \retval -1 error, check errno + */ +__INLINE__ int pthread_create_CommThread_np( pthread_t *thread, + pthread_attr_t *attr, + void *(*start_routine)(void *), + void *arg ) +{ + uint32_t usprg0 = _bgp_mfspr( SPRN_USPRG0 ); /* save orig usprg0 */ + + _bgp_mtspr( SPRN_USPRG0, _BGP_COMMTHREAD_MAGIC ); + + int rc = pthread_create( thread, attr, start_routine, arg ); + _bgp_mtspr( SPRN_USPRG0, usprg0 ); /* restore orig usprg0 */ + + return( rc ); +} +#endif + +/*! \brief Causes a commthread to disappear from the runqueue + * + * \note Kernel does not guarantee that the instruction pointer, stack pointer, and register state are preserved across a poof. + * \note TLS data is preserved across a poof + * \note This SPI is only executable on a comm. thread. + * \warning non-portable pthread API + * \return error indication + * \retval success Does not return. Thread has "poofed" + * \retval -1 Calling thread is not a CommThread, so cannot poof + */ +__INLINE__ int pthread_poof_np( void ) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_PTHREAD_POOF) + : "r0", "r3", "cc", "memory" ); + + return( rc ); +} + + + +/*! \defgroup COMMTHRD_OPCODES CommThread Opcodes + * \{ + * \note Only 1 interrupt route can be specified per opcode + * \note CallFunc, DisableIntOnEntry, EnableIntOnPoof can be specified in any combination + * \note Current support requires that DisableIntOnEntry and EnableIntOnPoof be specified + */ +#define COMMTHRD_OPCODE_DISABLE 0x00 /* !< Interrupt route - Not routed / interrupt disabled */ +#define COMMTHRD_OPCODE_CORE0 0x01 /* !< Interrupt route - Dispatched on core0 */ +#define COMMTHRD_OPCODE_CORE1 0x02 /* !< Interrupt route - Dispatched on core1 */ +#define COMMTHRD_OPCODE_CORE2 0x03 /* !< Interrupt route - Dispatched on core2 */ +#define COMMTHRD_OPCODE_CORE3 0x04 /* !< Interrupt route - Dispatched on core3 */ +#define COMMTHRD_OPCODE_BCAST 0x05 /* !< Interrupt route - Dispatched on all cores */ +#define COMMTHRD_OPCODE_ROUTEMASK 0x0F /* !< Interrupt route mask */ +#define COMMTHRD_OPCODE_CALLFUNC 0x10 /* !< The provided function will be called on the comm. thread */ +#define COMMTHRD_OPCODE_DISABLEINTONENTRY 0x20 /* !< Interrupts using cntrid will be disabled when comm. thread is invoked */ +#define COMMTHRD_OPCODE_ENABLEINTONPOOF 0x40 /* !< Interrupts using cntrid will be enabled when comm. thread poofs */ +/*! + * \} + */ + +/*! \brief Generates an InterruptID value + * \param[in] group group of the interrupt. range 0-9. + * \param[in] irq_in_group irq within the group. range 0-31. + * \return Composite value able to be passed to Kernel_SetCommThreadConfig + * \see Kernel_SetCommThreadConfig + */ +#define Kernel_MkInterruptID(group, irq_in_group) ((group<<5)|(irq_in_group&0x1f)) + +/*! + * \brief Sets kernel data structures needed to dispatch a communications thread + * + * Each interrupt on BGP can be used to launch a communications thread. Since access to the + * interrupt controller is privileged, the function exposes some interrupt control to the + * user application. + * \pre Counter must have been allocated via the LockBox_AllocateCounter() routine. + * \pre It is recommended that Kernel_DisableInteruptClass() be called twice on the counter + * to ensure that the interrupt is disabled until all interrupts for the counter + * have been configured. + * \pre All + * \post After the last call to Kernel_SetCommThreadConfig for the counter, invoke + * Kernel_EnableInterruptClass() and Kernel_HardEnableInterruptClass() on + * that counter to enable the interrupts for that class. + * \see LockBox_AllocateCounter + * \see Kernel_DisableInterruptClass + * \see Kernel_EnableInterruptClass + * \see Kernel_HardEnableInterruptClass + * \note An interrupt can only belong to 1 interrupt class (a.k.a., lockbox counter) + * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings. + * \note Kernel may prevent changing interrupt settings for certain InterruptID values. + * \note If an interrupt fires on a core without a comm. thread, results are not guaranteed. + * \return Completion status of the command. + * \retval 0 no error occurred + * \retval EINVAL invalid parameter + * \param[in] InterruptID Identifies a unique interrupt line. \see Kernel_MkInterruptID + * \param[in] opcode Specifies what operation to perform when the interrupt occurs. Valid \ref COMMTHRD_OPCODES + * \param[in] cntrid ID of the lockbox counter used for interrupt enable/disable control + * \param[in] funcptr Function pointer that will be invoked when the interrupt fires + * \param[in] arg1 1st argument to the funcptr when the interrupt fires + * \param[in] arg2 2nd argument to the funcptr when the interrupt fires + * \param[in] arg3 3rd argument to the funcptr when the interrupt fires + * + */ +__INLINE__ int Kernel_SetCommThreadConfig(int InterruptID, int opcode, LockBox_Counter_t cntrid, + Kernel_CommThreadHandler funcptr, + uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4) +{ + int rc = 0; + asm __volatile__ ("li 0,%1;" + "mr 3, %2;" + "mr 4, %3;" + "mr 5, %4;" + "mr 6, %5;" + "mr 7, %6;" + "mr 8, %7;" + "mr 9, %8;" + "mr 10, %9;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_SETCOMMTHREADCONFIG), + "r" (InterruptID), + "r" (opcode), + "r" (cntrid), + "r" (funcptr), + "r" (arg1), + "r" (arg2), + "r" (arg3), + "r" (arg4) + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" ); + return rc; +} + +/*! + * \brief Returns the kernel data structures that were specified to dispatch communication thread + * + * Each interrupt on BGP can be used to launch a communications thread. Since access to the + * interrupt controller is privileged, the function exposes some interrupt control to the + * user application. + * + * \param[in] InterruptID Identifies a unique interrupt line. + * \param[out] opcode Storage for opcode value. Specifies which core receives the interrupt. It also controls whether the interrupt disables a class of interrupts. Valid \ref COMMTHRD_OPCODES + * \param[out] cntrid Storage for ID of the lockbox counter used for interrupt enable/disable control + * \param[out] funcptr Storage for Function pointer that will be invoked when the interrupt fires + * \param[out] arg1 Storage for 1st argument to the funcptr when the interrupt fires + * \param[out] arg2 Storage for 2nd argument to the funcptr when the interrupt fires + * \param[out] arg3 Storage for 3rd argument to the funcptr when the interrupt fires + * \return Completion status of the command. + * \retval 0 no error occurred + * \retval EINVAL invalid parameter + * + */ +__INLINE__ int Kernel_GetCommThreadConfig(int InterruptID, int* opcode, LockBox_Counter_t* cntrid, + Kernel_CommThreadHandler* funcptr, + uint32_t* arg1, uint32_t* arg2, uint32_t* arg3, uint32_t* arg4) +{ + int rc = 0; + asm __volatile__ ("li 0,%1;" + "mr 3, %2;" + "mr 4, %3;" + "mr 5, %4;" + "mr 6, %5;" + "mr 7, %6;" + "mr 8, %7;" + "mr 9, %8;" + "mr 10, %9;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_GETCOMMTHREADCONFIG), + "r" (InterruptID), + "r" (opcode), + "r" (cntrid), + "r" (funcptr), + "r" (arg1), + "r" (arg2), + "r" (arg3), + "r" (arg4) + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" ); + return rc; +} + +/*! \brief Flush interrupt enable/disable state + * + * For each interrupt that has a lockbox counter associated with it, this SPI will + * update the interrupt controller to match the state specified by the lockbox counter. + * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings. + * \note Kernel is responsible for updating the interrupt controller to match all lockbox counters + * + * \return Completion status of the command. + * \retval 0 no error occurred + */ +__INLINE__ int Kernel_FlushInterruptState() +{ + int rc; + asm __volatile__ ("li 0,%1;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_FLUSHINTSTATE) + : "r0", "r3", "cc", "memory" ); + return rc; +} + +/*! \brief Indicates that the kernel should disable the interrupt + * + * Updates the interrupt class's lockbox to indicate that the kernel should disable the interrupt. + * Kernel will disable the interrupt at its leisure, but it should ensure that no communications thread + * is invoked for that interrupt class. + * + * The lockbox values have the following meanings: + * 0: Interrupts for this classid are enabled + * 1: Interrupts for this classid are logically disabled. + * If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt. + * 2: Interrupts for this classid are hard-disabled. The interrupt will not disturb the core. + * + * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings. + * \note Do not disable an already disabled interrupt class. + * \note A disabled interrupt class is disabled for all 4 cores, regardless of mode. + * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines + * + */ +__INLINE__ uint32_t Kernel_DisableInterruptClass(LockBox_Counter_t classid) +{ + return ( LockBox_FetchAndInc(classid) ); +} + +/*! \brief Indicates that the kernel should enable the interrupt + * + * Updates the interrupt class's lockbox to indicate that the kernel should leave this interrupt enabled. + * This does not hard-enable the interrupts for this classid (see Kernel_HardEnableInterruptClass). + * + * The lockbox values have the following meanings: + * 0: Interrupts for this classid are enabled + * 1: Interrupts for this classid are logically disabled. + * If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt. + * 2: Interrupts for this classid are hard-disabled. The interrupt will not disturb the core. + * + * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings. + * \note The kernel is responsible for incrementing the lockbox counter when the interrupt is hard-disabled. + * \note There is potential race condition that must be avoided in the kernel. The kernel will need to Query the lockbox when an interrupt occurs, and if it is non-zero, then increment it (another core could enable the interrupt class between those 2 events). One solution is to always FetchAndInc, but that may lead to an extranous (but rare) FlushInterruptState() call, followed by a FetchAndDec if zero. There are fancier solutions as well. + * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines + * + */ +__INLINE__ uint32_t Kernel_EnableInterruptClass(LockBox_Counter_t classid) +{ + return ( LockBox_FetchAndDec(classid) ); +} + +/*! \brief Indicates that the kernel should hard enable the interrupt + * + * Updates the interrupt class's lockbox to indicate that the kernel has hard-enabled this interrupt. + * If the kernel has actually disabled the interrupt, this SPI will enable the interrupt by using the + * Kernel_FlushInterruptState() SPI. + * + * The lockbox values have the following meanings: + * 0: Interrupts for this classid are enabled + * 1: Interrupts for this classid are logically disabled. + * If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt. + * 2: Interrupts for this classid are hard-disabled. The interrupt will not disturb the core. + * + * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings. + * \note The kernel is responsible for incrementing the lockbox counter when the interrupt is disabled. + * \note There is potential race condition that must be avoided in the kernel. The kernel will need to Query the lockbox when an interrupt occurs, and if it is non-zero, then increment it (another core could enable the interrupt class between those 2 events). One solution is to always FetchAndInc, but that may lead to an extranous (but rare) FlushInterruptState() call, followed by a FetchAndDec if zero. There are fancier solutions as well. + * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines + * + */ +__INLINE__ void Kernel_HardEnableInterruptClass(LockBox_Counter_t classid) +{ + LockBox_FetchAndDec(classid); + Kernel_FlushInterruptState(); +} + +/*! \brief Delivers an interrupt to the cores specified in the mask + * \param[in] coremask Bitmask describing which processor cores will receive the interrupt. Processor 0 is the least significant bit (1<<0 in C parlance). Processor 3 is 1<<3. Any combination of processors can be interrupted. + * \note It is possible to interrupt yourself. + */ +__INLINE__ int Kernel_DeliverCommSignal(uint32_t ipiset, uint32_t coremask) +{ + int rc = 0; + asm __volatile__ ("li 0,%1;" + "mr 3, %2;" + "mr 4, %3;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_DELIVERCOMMSIGNAL), + "r" (ipiset), + "r" (coremask) + : "r0", "r3", "r4", "cc", "memory" ); + return rc; +} + +/*! + * \brief Suspends/Resumes a core + * + * \param[in] target core ID + * \param[in] suspend Boolean. TRUE if core is to be suspended + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * \note In a threaded application, use care to avoid suspending a thread containing a lock needed by the active thread. (e.g., if the other core is performing a printf, it may have the glibc io subsystem locked with a mutex. If that happens, the main thread may deadlock if it also happens to call printf) + * + */ +__INLINE__ int Kernel_ChangeCoreEnables(uint32_t target_core, uint32_t suspend) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_CHGCOREENABLES), + "r" (target_core), + "r" (suspend) + : "r0", "r3", "r4", "cc", "memory" ); + return( rc ); +} + +/*! \brief Persistent Shared Memory interface to application. Currently, simlpy a wrapper to open(2), + * with a prefix of /dev/persist + */ +__INLINE__ int persist_open( char *name, int oflag, mode_t mode ) +{ + char pathName[PATH_MAX]; + strcpy(pathName, "/dev/persist/"); + strncat(pathName, name, PATH_MAX - strlen("/dev/persist/") - 1); + return open(pathName, oflag, mode); +} + +/*! \brief Memory region types that can be used for Kernel_GetMemorySize. +*/ +enum KERNEL_MEMSIZETYPE +{ + KERNEL_MEMSIZE_SHARED = 200, /*!< Size in bytes of shared memory */ + KERNEL_MEMSIZE_PERSIST, /*!< Size in bytes of persistent memory */ + KERNEL_MEMSIZE_HEAPAVAIL, /*!< Size in bytes of available heap (must be process leader (a.k.a main) thread) */ + KERNEL_MEMSIZE_ESTHEAPAVAIL, /*!< Estimated size in bytes of the available heap */ + KERNEL_MEMSIZE_STACKAVAIL, /*!< Size in bytes available to the process leader's stack. (must be process leader (a.k.a. main) thread) */ + KERNEL_MEMSIZE_ESTSTACKAVAIL, /*!< Estimated size in bytes available to the process leader's stack */ + KERNEL_MEMSIZE_STACK, /*!< Size in bytes of the process leader's stack (must be process leader (a.k.a. main) thread) */ + KERNEL_MEMSIZE_ESTSTACK, /*!< Estimated size in bytes available to the process leader's stack */ + KERNEL_MEMSIZE_HEAP, /*!< Size in bytes of the heap size*/ + KERNEL_MEMSIZE_GUARD /*!< Size in bytes of the heap guardpage */ +}; + +/*! \brief Returns size of the specified memory region. + */ + +__INLINE__ int Kernel_GetMemorySize(enum KERNEL_MEMSIZETYPE type, uint32_t* size) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_GETMEMSIZE), + "r" (type), + "r" (size) + : "r0", "r3", "r4", "cc", "memory" ); + return( rc ); +} + +/*! \brief Sets a virtual memory window for the process based on a user supplied physical address and tlb slot + */ + +__INLINE__ int Kernel_SetProcessWindow(int tlbslot, uint64_t window_paddr, size_t window_reqsize, uint32_t window_permissions, + uint32_t* window_actualvaddr, uint64_t* window_actualpaddr, size_t* window_actualsize) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + /* need to divide up the uint64 so we can setup the uint32 registers */ + uint32_t window_paddr_h = window_paddr >> 32; + uint32_t window_paddr_l = (uint32_t)window_paddr; + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "mr 7,%6;" + "mr 8,%7;" + "mr 9,%8;" + "mr 10,%9;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_SETPRWINDOW), + "r" (tlbslot), + "r" (window_paddr_h), + "r" (window_paddr_l), + "r" (window_reqsize), + "r" (window_permissions), + "r" (window_actualvaddr), + "r" (window_actualpaddr), + "r" (window_actualsize) + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" ); + return( rc ); +} + +/*! \brief Returns size of the process memory window that was set by the _SetProcessWindow. + */ + +__INLINE__ int Kernel_GetProcessWindow(int tlbslot, + uint32_t* window_actualvaddr, uint64_t* window_actualpaddr, size_t* window_actualsize) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "mr 5,%4;" + "mr 6,%5;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_GETPRWINDOW), + "r" (tlbslot), + "r" (window_actualvaddr), + "r" (window_actualpaddr), + "r" (window_actualsize) + : "r0", "r3", "r4", "r5", "r6", "cc", "memory" ); + return( rc ); +} + +/*! \brief Returns the range of available TLB slots for use by Kernel_SetProcessWindow + */ + +__INLINE__ int Kernel_GetProcessWindowSlotRange(int *minslot, int *maxslot) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "mr 3,%2;" + "mr 4,%3;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_GETPRWINDOWSLOTS), + "r" (minslot), + "r" (maxslot) + : "r0", "r3", "r4", "cc", "memory" ); + return( rc ); +} + +/*! \brief Returns the number of Active Processes in the node (np adjusted) + * + * \return Processor Count + * \retval 1 one process is active in the node. (SMT or DUAL/VN with -np restrictions) + * \retval 2 two processes active in this node (DUAL or VN with -np restrictions) + * \retval 3 three processes active in this node (VN with -np restrictions) + * \retval 4 four processes active in this node (VN) + */ +__INLINE__ int Kernel_ActiveProcessCount( void ) +{ + int rc = 0; /* this syscall returns RC in r3 and does not use errno */ + + asm __volatile__ ("li 0,%1;" + "sc;" + "mr %0, 3;" + : "=&r" (rc) /* early clobber */ + : "i" (_BGP_SYSCALL_NR_ACTIVEPROCESSCOUNT) + : "r0", "r3", "cc", "memory" ); + return( rc ); +} + + +#if SPI_DEPRECATED + +/* ! \see Kernel_PhysicalProcessorID */ +#define BGP_PhysicalProcessorID Kernel_PhysicalProcessorID + +/* ! \see Kernel_Virtual2Physical */ +#define _bgp_Virtual2Physical Kernel_Virtual2Physical + +/* ! \see Kernel_GetPersonality */ +#define rts_get_personality(p,s) Kernel_GetPersonality(p,s) + +/* ! \see Kernel_PhysicalProcessorID */ +#define rts_get_processor_id() Kernel_PhysicalProcessorID() + +/* ! \see Kernel_GetProcessorVersion */ +#define rts_get_processor_version() Kernel_GetProcessorVersion() +#endif + +__END_DECLS + + +#endif /* ! __LINUX__ */ + + +#endif /* Add nothing below this line */ diff --git a/arch/powerpc/include/spi/linux_interface.h b/arch/powerpc/include/spi/linux_interface.h new file mode 100644 index 00000000000000..bb49e1754a4036 --- /dev/null +++ b/arch/powerpc/include/spi/linux_interface.h @@ -0,0 +1,777 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + + +#ifndef _BGP_SPI_LINUX_INTERFACE_H_ /* Prevent multiple inclusion */ +#define _BGP_SPI_LINUX_INTERFACE_H_ + + +/*! \brief Returns the physical processor ID of the running PPC450 core. + * + * \return Physical processor ID + * \retval 0 Running on processor 0 + * \retval 1 Running on processor 1 + * \retval 2 Running on processor 2 + * \retval 3 Running on processor 3 + */ +extern inline uint32_t Kernel_PhysicalProcessorID( void ) { return 0; } /* ?????? */ + + +/*! \brief Causes a commthread to disappear from the runqueue + * + * \note Kernel does not guarantee that the instruction pointer, stack pointer, and register state are preserved across a poof. + * \note TLS data is preserved across a poof + * \note This SPI is only executable on a comm. thread. + * \warning non-portable pthread API + * \return error indication + * \retval success Does not return. Thread has "poofed" + * \retval -1 Calling thread is not a CommThread, so cannot poof + */ +int pthread_poof_np( void ); + + + + +/*! + * \brief Clears the Full Reception FIFO (DD1 workaround) + * + * This function exists to reset the DMA reception fifos - it is a workaround for DD1 only. It should not be needed in DD2. + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * + */ +int Kernel_ClearFullReceptionFifo(void); + + +/*! \brief Generates an InterruptID value + * \param[in] group group of the interrupt. range 0-9. + * \param[in] irq_in_group irq within the group. range 0-31. + * \return Composite value able to be passed to Kernel_SetCommThreadConfig + * \see Kernel_SetCommThreadConfig + */ +#define Kernel_MkInterruptID(group, irq_in_group) ((group<<5)|(irq_in_group&0x1f)) + + +/*! + * \brief Communication Thread interrupt handler function prototype + * + * \param[in] arg1 1st argument to commthread + * \param[in] arg2 2nd argument to commthread + * \param[in] arg3 3rd argument to commthread + */ +typedef void (*Kernel_CommThreadHandler)(uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4); + +/*! + * \brief Interrupt Group Prototype + * + * This data type is used to group interrupts of various devices together + * so they can be enabled or disabled simultaneously. A given interrupt user + * (eg. messaging, QCD, etc) specifies a value of this data type when its + * interrupt resources are allocated. The kernel associates those resources + * with the specified value so when this value is specified on the enable or + * disable interupts system call, all of the interrupts in the group are + * operated upon. Examples of devices that can be grouped in this way include + * DMA fifos, torus, tree, etc. + * + * \todo The kernel should provide interfaces to allocate a + * Kernel_InterruptGroup_t and deallocate it. + */ +typedef void * Kernel_InterruptGroup_t; + + +/*! \defgroup COMMTHRD_OPCODES CommThread Opcodes + * \{ + * \note Only 1 interrupt route can be specified per opcode + * \note CallFunc, DisableIntOnEntry, EnableIntOnPoof can be specified in any combination + * \note Current support requires that DisableIntOnEntry and EnableIntOnPoof be specified + */ +#define COMMTHRD_OPCODE_DISABLE 0x00 /* !< Interrupt route - Not routed / interrupt disabled */ +#define COMMTHRD_OPCODE_CORE0 0x01 /* !< Interrupt route - Dispatched on core0 */ +#define COMMTHRD_OPCODE_CORE1 0x02 /* !< Interrupt route - Dispatched on core1 */ +#define COMMTHRD_OPCODE_CORE2 0x03 /* !< Interrupt route - Dispatched on core2 */ +#define COMMTHRD_OPCODE_CORE3 0x04 /* !< Interrupt route - Dispatched on core3 */ +#define COMMTHRD_OPCODE_BCAST 0x05 /* !< Interrupt route - Dispatched on all cores */ +#define COMMTHRD_OPCODE_ROUTEMASK 0x0F /* !< Interrupt route mask */ +#define COMMTHRD_OPCODE_CALLFUNC 0x10 /* !< The provided function will be called on the comm. thread */ +#define COMMTHRD_OPCODE_DISABLEINTONENTRY 0x20 /* !< Interrupts using cntrid will be disabled when comm. thread is invoked */ +#define COMMTHRD_OPCODE_ENABLEINTONPOOF 0x40 /* !< Interrupts using cntrid will be enabled when comm. thread poofs */ + + +/*! + * \brief Sets kernel data structures needed to dispatch a communications thread + * + * Each interrupt on BGP can be used to launch a communications thread. Since access to the + * interrupt controller is privileged, the function exposes some interrupt control to the + * user application. + * \pre Counter must have been allocated via the LockBox_AllocateCounter() routine. + * \pre It is recommended that Kernel_DisableInteruptClass() be called twice on the counter + * to ensure that the interrupt is disabled until all interrupts for the counter + * have been configured. + * \pre All + * \post After the last call to Kernel_SetCommThreadConfig for the counter, invoke + * Kernel_EnableInterruptClass() and Kernel_HardEnableInterruptClass() on + * that counter to enable the interrupts for that class. + * \see LockBox_AllocateCounter + * \see Kernel_DisableInterruptClass + * \see Kernel_EnableInterruptClass + * \see Kernel_HardEnableInterruptClass + * \note An interrupt can only belong to 1 interrupt class (a.k.a., lockbox counter) + * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings. + * \note Kernel may prevent changing interrupt settings for certain InterruptID values. + * \note If an interrupt fires on a core without a comm. thread, results are not guaranteed. + * \return Completion status of the command. + * \retval 0 no error occurred + * \retval EINVAL invalid parameter + * \param[in] InterruptID Identifies a unique interrupt line. \see Kernel_MkInterruptID + * \param[in] opcode Specifies what operation to perform when the interrupt occurs. Valid \ref COMMTHRD_OPCODES + * \param[in] cntrid ID of the lockbox counter used for interrupt enable/disable control + * \param[in] funcptr Function pointer that will be invoked when the interrupt fires + * \param[in] arg1 1st argument to the funcptr when the interrupt fires + * \param[in] arg2 2nd argument to the funcptr when the interrupt fires + * \param[in] arg3 3rd argument to the funcptr when the interrupt fires + * + */ +typedef uint32_t* LockBox_Counter_t; /*!< Counter ID definition */ +int Kernel_SetCommThreadConfig(int InterruptID, + int opcode, + LockBox_Counter_t cntrid, + Kernel_CommThreadHandler funcptr, + uint32_t arg1, + uint32_t arg2, + uint32_t arg3, + uint32_t arg4); + + + +/*! \brief Indicates that the kernel should disable the interrupt + * + * Updates the interrupt class's lockbox to indicate that the kernel should disable the interrupt. + * Kernel will disable the interrupt at its leisure, but it should ensure that no communications thread + * is invoked for that interrupt class. + * + * The lockbox values have the following meanings: + * 0: Interrupts for this classid are enabled + * 1: Interrupts for this classid are logically disabled. + * If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt. + * 2: Interrupts for this classid are hard-disabled. The interrupt will not disturb the core. + * + * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings. + * \note Do not disable an already disabled interrupt class. + * \note A disabled interrupt class is disabled for all 4 cores, regardless of mode. + * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines + * + */ +uint32_t Kernel_DisableInterruptClass(LockBox_Counter_t classid); + + + +/*! + * \brief Enables/Disables the counter overflow/underflow interrupts + * + * This function is a wrapper around a system call that can enable or disable the 4 counter overflow/underflow interrupts + * + * \param[in] enable/disable boolean + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * + */ +int Kernel_ChgCounterInterruptEnables(uint32_t enable); + + +/* int rts_get_personality( void * pers, size_t size ); */ + + +/*! + * \brief Update mapping info about physically contigouos application memory regions + * ( used only in HPC mode ) + */ +int Kernel_UpdateAppSegmentInfo(void); + + + +/*! + * \brief Internal helper function for virtual to physical address translation + * + */ + +int User_Virtual2Physical(unsigned long vaddr, /* 32bit Virtual start address */ + size_t vsize, /* size in bytes of virtual range */ + uint32_t *ua_out, /* upper 4 Physical Address bits */ + uint32_t *pa_out ); + + +/*! \brief Translate a 32bit Virtual Address to a 36bit Physical Address, returning separated upper and lower parts. + * + * \param[in] pVA 32bit virtual address in the calling process + * \param[in] vsize size in bytes of the virtual range + * \param[out] ua_out upper 4 physical address bits + * \param[out] pa_out lower 32 physical address bits + * \return Error condition for translation + * \retval 0 Successful translation, with ua_out and pa_out filled in + * \retval -1 Invalid Virtual Address for this process, ua_out and pa_out unmodified. + * \retval -2 The range from vaddr to (vaddr+vsize) is not physically contiguous. + * \retval -3 vaddr in Scratch, but no Scratch, or not enough Scratch, is enabled. + * \retval -4 invalid parameter + * + * \warning Supports only Text, Data, Stack, and (optional) eDRAM Scratch translation + * \warning CNK "pagesize" is 1MB. + * \warning Text and Data are virtually contiguous, but not necessarily physically contiguous. + * \todo Does not (currently) support > 4GB DDR space. + * \todo Does not (currently) support Shared Memory Area. + */ +int Kernel_Virtual2Physical( void *pVA, /* input: 32bit Virtual start address */ + size_t vsize, /* input: size in bytes of virtual range */ + uint32_t *ua_out, /* output: upper 4 Physical Address bits */ + uint32_t *pa_out ); /* output: lower 32 Physical Address bits */ + + +/*! + * \brief Query Free DMA Counter Subgroups within a Group + * + * This function is a wrapper around a system call that returns a list of the + * free (available) subgroups within the specified group. + * + * \param[in] type Specifies whether this is an injection or + * reception counter group (DMA_Type_Injection + * or DMA_Type_Reception) + * \param[in] grp Group number being queried (0 to + * DMA_NUM_COUNTER_GROUPS-1) + * \param[out] num_subgroups Pointer to an int where the number of free + * subgroups in the specified group is returned + * \param[out] subgroups Pointer to an array of num_subgroups ints where + * the list of num_subgroups subgroups is returned. + * Each int is the subgroup number + * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1). The + * caller must provide space for + * DMA_NUM_COUNTERS_PER_SUBGROUP ints, in case the + * entire counter group is free. + * + * \retval 0 Successful. num_subgroups and subgroups array set as described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \internal This function is not intended to be called directly + * \see DMA_CounterGroupQueryFree() + * \note The kernel may need to synchronize with other cores performing + * allocate or free syscalls. + * + */ +uint32_t Kernel_CounterGroupQueryFree(uint32_t type, + uint32_t group, + uint32_t * num_subgroups, + uint32_t * subgroups); + + +/*! + * \brief Allocate DMA Counters From A Group + * + * This function is a wrapper around a system call that allocates DMA counters + * from the specified group. Counters may be allocated in subgroups of + * DMA_NUM_COUNTERS_PER_SUBGROUP counters. Parameters specify how interrupts, + * generated when a counter hits zero, are to be handled. A + * DMA_CounterGroup_t structure is returned for use in other inline + * functions to operate on the allocated counters. + * + * \param[in] type Specifies whether this is an injection or + * reception counter group (DMA_Type_Injection + * or DMA_Type_Reception) + * \param[in] grp Group number whose counters are being allocated + * (0 to DMA_NUM_COUNTER_GROUPS-1) + * \param[in] num_subgroups Number of subgroups to be allocated from the group + * (1 to DMA_NUM_COUNTERS_PER_SUBGROUP) + * \param[in] subgroups Pointer to an array of num_subgroups ints where + * the list of subgroups to be allocated is provided. + * Each int is the subgroup number + * (0 to num_subgroups-1). + * \param[in] target The core that will receive the interrupt when a + * counter in this allocation hits zero + * (0 to DMA_NUM_COUNTER_GROUPS-1) + * \param[in] handler A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * counter in this allocation hits zero. This + * function must be coded to take 3 parameters: + * void* A pointer to storage specific to this + * handler. This is the handler_parm + * specified on this allocation function. + * int The counter's subgroup number (0 to + * DMA_NUM_COUNTER_SUBGROUPS-1). + * Note this number spans all groups. + * If handler is NULL, hit-zero interrupts will not + * be enabled for these counters. + * \param[in] handler_parm A pointer to storage that should be passed to the + * interrupt handling function (see handler + * parameter) + * \param[in] interruptGroup A InterruptGroup_t that identifies the + * group of interrupts that the counters being + * allocated will become part of. + * \param[out] cg_ptr Pointer to a structure that is filled in upon + * successful return for use in other inline + * functions to operate on the allocated counters. + * \li counter - Array of software counter + * structures. Each element + * points to the corresponding + * hardware counter in DMA SRAM. + * Pointers are null if not + * allocated). + * Counters are initialized to + * DMA_COUNTER_INIT_VAL, + * disabled, their hit_zero bit + * is off, base and max are NULL. + * \li status_ptr - Points to status area within the + * DMA memory map. + * \li permissions - Bits set for each allocated + * counter + * \li grp_permissions - Permissions for each + * subgroup + * \li group_id - The group number + * \li type - The type of DMA (injection or + * reception) + * + * \retval 0 Successful. Counters allocated and cg_ptr structure filled in as + * described. + * \retval -1 Unsuccessful. errno gives the reason. Nothing has been + * allocated. + * + * \internal This function is not intended to be called directly + * \see DMA_CounterGroupAllocate() + * \note The kernel may need to synchronize with other cores performing queries + * or frees. + * + */ +uint32_t Kernel_CounterGroupAllocate(uint32_t type, + uint32_t group, + uint32_t num_subgroups, + uint32_t * subgroups, + uint32_t target, + uint32_t handler, + uint32_t * handler_parm, + uint32_t interruptGroup, + uint32_t * cg_ptr); + + +/*! + * \brief Free DMA Counters From A Group + * + * This function is a wrapper around a system call that frees DMA counters + * from the specified group. Counters may be freed in subgroups of + * DMA_NUM_COUNTERS_PER_SUBGROUP counters. + * + * \param[in] grp Group number whose counters are being freed + * (0 to DMA_NUM_COUNTER_GROUPS-1) + * \param[in] num_subgroups Number of subgroups to be freed from the group + * (1-DMA_NUM_COUNTERS_PER_SUBGROUP) + * \param[in] subgroups Pointer to an array of num_subgroups ints where + * the list of subgroups to be freed is provided. + * Each int is the subgroup number + * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1). + * \param[out] cg_ptr Pointer to the structure previously filled in when + * these counters were allocated. Upon successful + * return, this structure is updated to reflect the + * freed counters: + * \li counter[] - Counter structures Pointers to + * freed counters nulled. + * \li permissions - Bits cleared for each freed + * counter. + * + * \retval 0 Successful. Counters freed and cg_ptr structure updated as + * described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \internal This function is not intended to be called directly + * \see DMA_CounterGroupFree() + * \note The kernel may need to synchronize with other cores performing allocates + * or queries. + */ +uint32_t Kernel_CounterGroupFree( uint32_t group, + uint32_t num_subgroups, + uint32_t * subgroups, + uint32_t * cg_ptr ); + + +/*! + * \brief Query Free DMA InjFifos within a Group + * + * This function is a wrapper around a system call that returns a list of the + * free (available to be allocated) fifos within the specified group. + * + * \param[in] grp Group number being queried + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1) + * \param[out] num_fifos Pointer to an int where the number of free + * fifos in the specified group is returned + * \param[out] fifo_ids Pointer to an array of num_fifos ints where + * the list of free fifos is returned. + * Each int is the fifo number + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * The caller must provide space for + * DMA_NUM_INJ_FIFOS_PER_GROUP ints, + * in case the entire fifo group is free. + * + * \retval 0 Successful. num_fifos and fifo_ids array set as described. + * \retval -1 Unsuccessful. errno gives the reason. + * \internal This function is not intended to be called directly + * \see DMA_InjFifoGroupQueryFree() + */ + +uint32_t Kernel_InjFifoGroupQueryFree( uint32_t group, + uint32_t * num_fifos, + uint32_t * fifo_ids); + + +/*! + * \brief Allocate DMA InjFifos From A Group + * + * This function is a wrapper around a system call that allocates specified + * DMA injection fifos from the specified group. Parameters specify whether + * each fifo is high or normal priority, local or non-local, and which torus + * fifos it maps to. A DMA_InjFifoGroup_t structure is returned for + * use in other inline functions to operate on the allocated fifos. + * + * Refer to the interrupt discussion at the top of this include file to see why + * there are no interrupt-related parameters. + * + * \param[in] grp Group number whose DMA injection fifos are being + * allocated (0 to DMA_NUM_INJ_FIFO_GROUPS-1) + * \param[in] num_fifos Number of fifos to be allocated from the group + * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP) + * \param[in] fifo_ids Pointer to an array of num_fifos ints where + * the list of fifos to be allocated is provided. + * Each int is the fifo number (0 to num_fifos-1). + * \param[in] priorities Pointer to an array of num_fifos short ints where + * the list of priorities to be assigned to the fifos + * is provided. Each short int indicates the priority + * to be assigned to each of the fifos identified in + * the fifo_ids array (0 is normal, 1 is high priority). + * \param[in] locals Pointer to an array of num_fifos short ints where + * an indication is provided of whether each fifo will + * be used for local transfers (within the same node) + * or torus transfers. Each short int indicates the + * local/non-local attribute to be assigned to each of + * the fifos identified in the fifo_ids array (0 is + * non-local, 1 is local). If 0, the corresponding + * array element in ts_inj_maps indicates which torus + * fifos can be injected. + * \param[in] ts_inj_maps Pointer to an array of num_fifos short ints where + * the torus fifos that can be injected are specified + * for each fifo. Each short int specifies which of + * the 8 torus injection fifos can be injected when a + * descriptor is injected into the DMA injection fifo. + * Must be non-zero when the corresponding "locals" + * is 0. + * \param[out] fg_ptr Pointer to a structure that is filled in upon + * successful return for use in other inline functions + * to operate on the allocated fifos. + * \li fifos - Array of fifo structures. Structures + * for allocated fifos are initialized as + * documented below. Structures for + * fifos not allocated by this instance of + * this syscall are initialized to binary + * zeros. Allocated fifos are enabled. + * \li status_ptr - Points to status area within the + * DMA memory map. + * \li permissions - Bits indicating which fifos were + * allocated during this syscall. + * \li group_id - The id of this group. + * + * \retval 0 Successful. Fifos allocated and fg_ptr structure filled in as + * described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \internal This function is not intended to be called directly + * \see DMA_InjFifoGroupAllocate() + * \return The group fifo structure pointed to by fg_ptr is completely + * initialized as follows: + * - status_ptr points to the appropriate fifo group DMA memory map + * - fifo structures array. Fifo structures for fifos not allocated + * during this syscall are initialized to binary zeros. Fifo + * structures for fifos allocated during this syscall are initialized: + * - fifo_hw_ptr points to the DMA memory map for this fifo. The + * hardware start, end, head, and tail are set to zero by the + * kernel. + * - All other fields in the structure are set to zero by the kernel + * except priority, local, and ts_inj_map are set to reflect what + * was requested in the priorities, locals, and ts_inj_maps + * syscall parameters. + * + */ +uint32_t Kernel_InjFifoGroupAllocate( uint32_t group, + uint32_t num_fifos, + uint32_t * fifo_ids, + uint16_t * priorities, + uint16_t * locals, + uint8_t * ts_inj_maps, + uint32_t * fg_ptr ); + + + +/*! + * \brief Free DMA InjFifos From A Group + * + * This function is a wrapper around a system call that frees DMA injection + * counters from the specified group. + * + * \param[in] grp Group number whose DMA injection fifos are being + * freed (0 to DMA_NUM_INJ_FIFO_GROUPS-1) + * \param[in] num_fifos Number of fifos to be freed from the group + * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP) + * \param[in] fifo_ids Pointer to an array of num_fifos ints where + * the list of fifos to be freed is provided. + * Each int is the fifo number (0 to num_fifos-1). + * \param[in] fg_ptr Pointer to the structure previously filled in when + * these fifos were allocated. Upon successful + * return, this structure is updated to reflect the + * freed fifos: + * \li fifos - Structures for freed fifos zero'd. + * Freed fifos are disabled. + * \li permissions - Bits cleared for each freed fifo. + * + * \retval 0 Successful. Fifos freed and fg_ptr structure updated as described. + * \retval -1 Unsuccessful. errno gives the reason. + * + * \internal This function is not intended to be called directly + * \see DMA_InjFifoGroupFree() + * \note This is a fatal error if any of the fifos are non empty and activated + * + */ +uint32_t Kernel_InjFifoGroupFree(uint32_t group, + uint32_t num_fifos, + uint32_t * fifo_ids, + uint32_t * fg_ptr); + + + +/*! + * \brief DMA InjFifo Initialization By Id + * + * - For an allocated injection DMA fifo, initialize its start, head, tail, and + * end. + * - Compute fifo size and free space. + * - Initialize wrap count. + * - Activate the fifo. + * + * \param[in] fg_ptr Pointer to fifo group structure. + * \param[in] fifo_id Id of the fifo to be initialized + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * \param[in] va_start Virtual address of the start of the fifo. + * \param[in] va_head Virtual address of the head of the fifo (typically + * equal to va_start). + * \param[in] va_end Virtual address of the end of the fifo. + * + * \retval 0 Successful. + * \retval -1 Unsuccessful. Error checks include + * - va_start < va_end + * - va_start <= va_head <= + * (va_end - DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) + * - va_start and va_end are 32-byte aligned + * - fifo_size is larger than (DMA_MIN_INJECT_SIZE_IN_QUADS + + * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) + * + */ +uint32_t Kernel_InjFifoInitById(uint32_t * fg_ptr, + int fifo_id, + uint32_t * va_start, + uint32_t * va_head, + uint32_t * va_end); + + + +/*! + * \brief Set DMA Reception Fifo Map + * + * This function is a wrapper around a system call that + * - Sets DCRs establishing the map between the hardware torus fifos and the + * DMA reception fifos that are to receive the packets from those hardware + * torus fifos. + * - Sets DCRs establishing the DMA reception fifos that are to receive + * local transfer packets. + * - Sets the DCRs establishing the type (0 or 1) of each reception fifo. + * - Sets the DCRs establishing the threshold for type 0 and 1 reception fifos. + * - Leaves all of the fifos that are used in a "disabled" state. + * DMA_RecFifoInitById() initializes and enables the fifos. + * + * \param[in] rec_map Reception Fifo Map structure, defining the mapping. + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * + * \internal This is an internal syscall + * \see DMA_RecFifoSetMap + * \note This function should be called once per job, after DMA_ResetRelease(). + * It may be called by any core, but once a core has called it, other + * calls by that same core or any other core will fail. + * + * \note During job init, the kernel sets up the DCR clear masks for each + * reception fifo group (DCRs 0xD68 - 0xD6C) such that a write to clear + * a fifo in group g only clears group g. + * + */ +int Kernel_RecFifoSetMap(uint32_t* rec_map); + + +/*! + * \brief Get DMA Reception Fifo Map + * + * This function is a wrapper around a system call that returns a DMA + * reception fifo map structure, filled in according to the DCRs. + * + * \param[in,out] rec_map A pointer to a Reception Fifo Map structure + * that will be filled-in upon return. + * + * \retval 0 Successful + * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes + * enum located in bgp/arch/include/common/bgp_ras.h + * + */ +int Kernel_RecFifoGetMap(uint32_t* rec_map); + + + +/*! + * \brief Get DMA Reception Fifo Group + * + * This is a wrapper around a System Call. This function returns THE + * one-and-only pointer to the fifo group structure, with the entries all + * filled in from info in the DCRs. If called multiple times with the same + * group, it will always return the same pointer, and the system call will + * not be invoked again. + * + * It must be called AFTER DMA_RecFifoSetMap(). + * + * By convention, the same "target" is used for normal and header fifo + * interrupts (could be changed). In addition, by convention, interrupts for + * fifos in group g come out of the DMA as non-fatal irq bit 28+g, + * ie, only fifos in group g can cause the "type g" threshold interrupts. + * + * \param[in] grp The group number (0 through DMA_NUM_REC_FIFO_GROUPS). + * \param[in] target The core that will receive the interrupt when a + * fifo in this group reaches its threshold + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * Ignored on subsequent call with the same group. + * \param[in] normal_handler A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * normal fifo in this group reaches its threshold. + * This function must be coded to take 2 parameters: + * void* A pointer to storage specific to this + * handler. This is the normal_handler_parm + * specified on this function call. + * int The global fifo ID of the fifo that hit + * its threshold (0 through + * NUM_DMA_REC_FIFOS-1). + * If normal_handler is NULL, threshold interrupts + * are not delivered for normal fifos in this group. + * Ignored on subsequent call with the same group. + * \param[in] normal_handler_parm A pointer to storage that should be passed + * to the normal interrupt handling function + * (see normal_handler parameter). + * Ignored on subsequent call with the same + * group. + * \param[in] header_handler A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * header fifo in this group reaches its threshold. + * This function must be coded to take 2 parameters: + * void* A pointer to storage specific to this + * handler. This is the header_handler_parm + * specified on this function call. + * int The global fifo ID of the fifo that hit + * its threshold (0 through + * NUM_DMA_REC_FIFOS-1). + * If header_handler is NULL, threshold interrupts + * are not delivered for header fifos in this group. + * Ignored on subsequent call with the same group. + * \param[in] header_handler_parm A pointer to storage that should be passed + * to the header interrupt handling function + * (see header_handler parameter). + * Ignored on subsequent call with the same + * group. + * \param[in] interruptGroup A InterruptGroup_t that identifies the + * group of interrupts that the fifos in this group + * will become part of. + * Ignored on subsequent call with the same group. + * + * \return RecFifoGroupStruct Pointer to a DMA Reception Fifo Group structure + * that reflects the fifos that are being used in + * this group. This same structure is shared by + * all users of this reception fifo group. + * NULL is returned if an error occurs. + * + * \note The following comments from Phil about the internals of the syscall: + * - error checks + * - 0 <= group_id < 4 + * - the start of the fifo group is a valid virtual address (tlb mapped)? + * - disable the rDMA + * - call _BGP_rDMA_Fifo_Get_Map to get the DCR mapping information + * - loop through the map to determine how many and which fifos in this group + * are used, including headers + * - filling in the addresses of used fifos + * - In particular, any pointer to any fifo in the group that is not used + * will have a null pointer + * - furthermore, + * - write starting values to all used fifos + * - make sure all interrupts are cleared + * - enable rDMA + * + */ +int Kernel_RecFifoGetFifoGroup( + uint32_t * fifogroup, + int grp, + int target, + void * normal_handler, + void * normal_handler_parm, + void * header_handler, + void * header_handler_parm, + void * interruptGroup + ); + + + +/*! + * \brief DMA RecFifo Initialization By Id + * + * - For a DMA reception fifo, initialize its start, head, tail, and end. + * - Compute fifo size and free space. + * + * \param[in] fg_ptr Pointer to fifo group structure. + * \param[in] fifo_id Id of the fifo to be initialized + * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1). + * \param[in] va_start Virtual address of the start of the fifo. + * \param[in] va_head Virtual address of the head of the fifo (typically + * equal to va_start). + * \param[in] va_end Virtual address of the end of the fifo. + * + * \retval 0 Successful. + * \retval -1 Unsuccessful. Error checks include + * - va_start < va_end + * - va_start <= va_head < va_end + * - va_start and va_end are 32-byte aligned + * - fifo_size >= DMA_MIN_REC_FIFO_SIZE_IN_BYTES + * + */ +int Kernel_RecFifoInitById( uint32_t * fg_ptr, + int fifo_id, + void * va_start, + void * va_head, + void * va_end ); + + + + +#endif /* Add nothing below this line */ diff --git a/arch/powerpc/include/spi/linux_kernel_spi.h b/arch/powerpc/include/spi/linux_kernel_spi.h new file mode 100644 index 00000000000000..05d32f88d8cb2b --- /dev/null +++ b/arch/powerpc/include/spi/linux_kernel_spi.h @@ -0,0 +1,113 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ + + +#ifndef _LINUX_KERNEL_SPI_H_ /* Prevent multiple inclusion */ +#define _LINUX_KERNEL_SPI_H_ + +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/string.h> + +#include <common/bgp_personality.h> + +#ifndef __LINUX_KERNEL__ +#define __LINUX_KERNEL__ +#endif + +#ifndef __BGP_HIDE_STANDARD_TYPES__ +#define __BGP_HIDE_STANDARD_TYPES__ +#endif + + +/* this comes from src/arch/ppc/platforms/4xx/bluegene.c */ +extern int bluegene_getPersonality(void *buf, int bufsize); +#define rts_get_personality(p,s) bluegene_getPersonality(p,s) + + +/* Lockbox used by DMA_InjFifoRgetFifoFullInit ... */ +#define LockBox_FetchAndClear(x) + + +/* asm inlines used by dma spi */ + +#define _bgp_msync(void) asm volatile ("msync" : : : "memory") +#define _bgp_mbar(void) asm volatile ("mbar" : : : "memory") +#define _bgp_isync(void) asm volatile ("isync" : : : "memory") +extern inline void _bgp_msync_nonspeculative( void ) +{ + do { + asm volatile (" b 1f;" + " nop;" + "1: msync;" + : : : "memory"); + } + while(0); +} + +#define _bgp_QuadLoad(v,f) asm volatile( "lfpdx " #f ",0,%0" :: "r" (v) : "fr" #f ) +#define _bgp_QuadStore(v,f) asm volatile( "stfpdx " #f ",0,%0" :: "r" (v) : "memory" ) + +#define _bgp_dcache_touch_line(v) do { asm volatile ("dcbt 0,%0" : : "r" (v)); } while(0) + +/* in ppc450_inlines.h */ +/* #define _bgp_msync_nonspeculative(x) */ +/* { */ +/* do { */ +/* asm volatile (" b 1f;" */ +/* " nop;" */ +/* "1: msync;" */ +/* : : : "memory"); */ +/* } */ +/* while(0); */ +/* } */ + +/* assert and printf variants for kernel use */ + +#define assert(x) if ( !(x)) printk( KERN_ALERT "(E) bgpdma assertion at %s:%d\n",__FILE__,__LINE__); + +#define SPI_assert(x) assert(x) + +#define printf(...) printk(KERN_INFO __VA_ARGS__) + +/* we need a dummy errno for linking */ +static int errno; + +/* general bgp quad struct */ +/* (better one in bgp_types.h , use that in preference) */ +/* typedef struct { u32 w[4]; } __attribute__ ((aligned(16))) _bgp_QuadWord_t; */ + + +/* virtual base address of the DMA (see bgp_dma_memap.h) */ +#define _BGP_VA_DMA bgpdma_kaddr + +#include <asm/bgp_personality.h> +#include <common/alignment.h> +#include <bpcore/bgp_dma_memmap.h> +#include <bpcore/ic_memmap.h> + +#include <spi/DMA_Counter.h> +#include <spi/DMA_Fifo.h> +#include <spi/DMA_InjFifo.h> +#include <spi/DMA_RecFifo.h> + + + +#endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 8d1a419df35d78..14037caa20c0a9 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -4,6 +4,8 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' +EXTRA_CFLAGS = + ifeq ($(CONFIG_PPC64),y) CFLAGS_prom_init.o += -mno-minimal-toc endif diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 923f87aff20a45..65c67c44d830e0 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1634,6 +1634,19 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, + { /* Blue Gene/P */ + .pvr_mask = 0xfffffff0, + .pvr_value = 0x52131880, + .cpu_name = "450 Blue Gene/P", + .cpu_features = CPU_FTRS_440x6, + .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .mmu_features = MMU_FTR_TYPE_44x, + .icache_bsize = 32, + .dcache_bsize = 32, + .cpu_setup = __setup_cpu_460gt, + .machine_check = machine_check_440A, + .platform = "ppc440", + }, { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index a088c064ae4055..33e2edd41d79eb 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -29,7 +29,7 @@ BEGIN_FTR_SECTION \ b 2f; \ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ - REST_32FPRS(n,base); \ + REST_32FPRS(n,c,base); \ b 3f; \ 2: REST_32VSRS(n,c,base); \ 3: @@ -38,13 +38,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ BEGIN_FTR_SECTION \ b 2f; \ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ - SAVE_32FPRS(n,base); \ + SAVE_32FPRS(n,c,base); \ b 3f; \ 2: SAVE_32VSRS(n,c,base); \ 3: #else -#define REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) -#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) +#define REST_32FPVSRS(n,b,base) REST_32FPRS(n,b,base) +#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n,b,base) #endif /* diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index b56fecc93a16c6..9ef78498efaf9a 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -143,7 +143,11 @@ skpinv: addi r4,r4,1 /* Increment */ sync /* Initialize MMUCR */ +#ifdef CONFIG_L1_WRITETHROUGH + lis r5,PPC44x_MMUCR_U2SWOAE@h +#else li r5,0 +#endif mtspr SPRN_MMUCR,r5 sync @@ -158,7 +162,12 @@ skpinv: addi r4,r4,1 /* Increment */ /* attrib fields */ /* Added guarded bit to protect against speculative loads/stores */ li r5,0 +#ifdef CONFIG_L1_WRITETHROUGH + ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_M | PPC44x_TLB_U2) + oris r5,r5,PPC44x_TLB_WL1@h +#else ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) +#endif li r0,63 /* TLB slot 63 */ @@ -228,6 +237,14 @@ skpinv: addi r4,r4,1 /* Increment */ lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ mtspr SPRN_IVPR,r4 +#ifdef CONFIG_SMP + /* are we an additional CPU? */ + li r0, 0 + mfspr r4, SPRN_PIR + cmpw r4, r0 + bgt secondary_entry +#endif + /* * This is where the main kernel code starts. */ @@ -278,6 +295,70 @@ skpinv: addi r4,r4,1 /* Increment */ mtspr SPRN_SRR1,r3 rfi /* change context and jump to start_kernel */ + +#ifdef CONFIG_SMP +/* Extra cpus will come here. */ +//#define _GLOBAL_DEVINIT(n) \ +// .section .text.devinit; \ +// .text; \ +// .stabs __stringify(n:F-1),N_FUN,0,0,n;\ +// .globl n; \ +//n: +// +//_GLOBAL_DEVINIT(secondary_entry) +secondary_entry: + /* Enable U2SWOA. U2 will be enabled in TLBs. */ + lis r7,PPC44x_MMUCR_U2SWOAE@h + mtspr SPRN_MMUCR,r7 + li r7,0 + mtspr SPRN_PID,r7 + sync + lis r8,KERNELBASE@h + + /* The tlb_44x_hwater global var (setup by cpu#0) reveals how many + * 256M TLBs we need to map. + */ + lis r9, tlb_44x_hwater@ha + lwz r9, tlb_44x_hwater@l(r9) + + li r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_M|PPC44x_TLB_U2) + oris r5, r5, PPC44x_TLB_WL1@h + +2: addi r9,r9,1 + cmpwi r9,62 /* Stop at entry 62 which is the firmware */ + beq 3f + addis r7,r7,0x1000 /* add 256M */ + addis r8,r8,0x1000 + ori r6,r8,PPC44x_TLB_VALID | PPC44x_TLB_256M + + tlbwe r6,r9,PPC44x_TLB_PAGEID /* Load the pageid fields */ + tlbwe r7,r9,PPC44x_TLB_XLAT /* Load the translation fields */ + tlbwe r5,r9,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ + b 2b + +3: isync + + /* Setup context from global var secondary_ti */ + lis r1, secondary_ti@ha + lwz r1, secondary_ti@l(r1) + lwz r2, TI_TASK(r1) /* r2 = task_info */ + + addi r3,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r3 + + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + /* Let's move on */ + lis r4,start_secondary@h + ori r4,r4,start_secondary@l + lis r3,MSR_KERNEL@h + ori r3,r3,MSR_KERNEL@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi /* change context and jump to start_secondary */ +#endif + /* * Interrupt vector entry code * @@ -588,7 +669,15 @@ finish_tlb_load: andi. r10,r12,_PAGE_USER /* User page ? */ beq 1f /* nope, leave U bits empty */ rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */ -1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ +1: +#ifdef CONFIG_L1_WRITETHROUGH + andi. r10, r11, PPC44x_TLB_I + bne 2f + oris r11,r11,PPC44x_TLB_WL1@h /* Add coherency for non-inhibited */ + ori r11,r11,PPC44x_TLB_SWOA|PPC44x_TLB_M +2: +#endif + tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */ /* Done...restore registers and get out of here. */ diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 15f28e0de78dae..7282d2f4d40110 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -491,6 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) * * void clear_pages(void *page, int order) ; */ +#if !defined(CONFIG_WRAP_COPY_TOFROM_USER) _GLOBAL(clear_pages) li r0,PAGE_SIZE/L1_CACHE_BYTES slw r0,r0,r4 @@ -502,12 +503,25 @@ _GLOBAL(clear_pages) stw r4, 8(r3) stw r4, 12(r3) #else +#ifdef CONFIG_L1_WRITETHROUGH + /* assuming 32 byte cacheline */ + li r4, 0 +1: stw r4, 0(r3) + stw r4, 4(r3) + stw r4, 8(r3) + stw r4, 12(r3) + stw r4, 16(r3) + stw r4, 20(r3) + stw r4, 24(r3) + stw r4, 28(r3) +#else 1: dcbz 0,r3 #endif +#endif addi r3,r3,L1_CACHE_BYTES bdnz 1b blr - +#endif /* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of @@ -524,6 +538,7 @@ _GLOBAL(clear_pages) stw r8,12(r3); \ stwu r9,16(r3) +#if !defined(CONFIG_WRAP_COPY_TOFROM_USER) _GLOBAL(copy_page) addi r3,r3,-4 addi r4,r4,-4 @@ -556,7 +571,9 @@ _GLOBAL(copy_page) mtctr r0 1: dcbt r11,r4 +#ifndef CONFIG_L1_WRITETHROUGH dcbz r5,r3 +#endif COPY_16_BYTES #if L1_CACHE_BYTES >= 32 COPY_16_BYTES @@ -578,6 +595,7 @@ _GLOBAL(copy_page) li r11,4 b 2b #endif /* CONFIG_8xx */ +#endif /* CONFIG_WRAP_COPY_TOFROM_USER */ /* * void atomic_clear_mask(atomic_t mask, atomic_t *addr) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 65484b2200b36a..e39ab04671c0f9 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -489,6 +489,14 @@ int __devinit start_secondary(void *unused) current->active_mm = &init_mm; smp_store_cpu_info(cpu); + +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) + /* Clear any pending timer interrupts */ + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); + + /* Enable decrementer interrupt */ + mtspr(SPRN_TCR, TCR_DIE); +#endif set_dec(tb_ticks_per_jiffy); preempt_disable(); cpu_callin_map[cpu] = 1; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 8db35278a4b436..886a01bc6a65af 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -29,3 +29,4 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o obj-y += code-patching.o obj-y += feature-fixups.o obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o +obj-$(CONFIG_WRAP_COPY_TOFROM_USER) += copy_tofrom_user.o diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index c657de59abca8c..a774f03fb7b3cd 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -98,7 +98,7 @@ _GLOBAL(cacheable_memzero) bdnz 4b 3: mtctr r9 li r7,4 -#if !defined(CONFIG_8xx) +#if !defined(CONFIG_8xx) && !defined(CONFIG_L1_WRITETHROUGH) 10: dcbz r7,r6 #else 10: stw r4, 4(r6) @@ -200,7 +200,7 @@ _GLOBAL(cacheable_memcpy) mtctr r0 beq 63f 53: -#if !defined(CONFIG_8xx) +#if !defined(CONFIG_8xx) && !defined(CONFIG_L1_WRITETHROUGH) dcbz r11,r6 #endif COPY_16_BYTES @@ -318,7 +318,11 @@ _GLOBAL(backwards_memcpy) mtctr r7 b 1b +#if defined(CONFIG_WRAP_COPY_TOFROM_USER) +_GLOBAL(__real__copy_tofrom_user) +#else _GLOBAL(__copy_tofrom_user) +#endif addi r4,r4,-4 addi r6,r3,-4 neg r0,r3 @@ -391,7 +395,11 @@ _GLOBAL(__copy_tofrom_user) mtctr r8 53: dcbt r3,r4 +#ifdef CONFIG_L1_WRITETHROUGH +54: +#else 54: dcbz r11,r6 +#endif .section __ex_table,"a" .align 2 .long 54b,105f diff --git a/arch/powerpc/lib/copy_tofrom_user.c b/arch/powerpc/lib/copy_tofrom_user.c new file mode 100644 index 00000000000000..525da59b188cff --- /dev/null +++ b/arch/powerpc/lib/copy_tofrom_user.c @@ -0,0 +1,19 @@ +#include <linux/kernel.h> + +extern unsigned long __real__copy_tofrom_user(void *to, + const void __user *from, unsigned long size) ; + +#if defined(CONFIG_BGP_TORUS) +extern unsigned long bgp_fpu_instrument_copy_tofrom_user(void *to, + const void __user *from, unsigned long size) ; +#endif + +unsigned long __copy_tofrom_user(void *to, + const void __user *from, unsigned long size) +{ +#if defined(CONFIG_BGP_TORUS) + int rc=bgp_fpu_instrument_copy_tofrom_user(to, from, size) ; + if( 0 == rc) return 0 ; +#endif + return __real__copy_tofrom_user(to, from, size) ; +} diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 98052ac96580d6..dc1742a4b8ee25 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -64,6 +64,12 @@ static void __init ppc44x_update_tlb_hwater(void) static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) { unsigned int entry = tlb_44x_hwater--; + unsigned attrs = PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX; +#ifdef CONFIG_L1_WRITETHROUGH + attrs |= PPC44x_TLB_WL1 | PPC44x_TLB_SWOA | PPC44x_TLB_M; +#else + attrs |= PPC44x_TLB_G; +#endif ppc44x_update_tlb_hwater(); @@ -72,7 +78,7 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) "tlbwe %1,%3,%5\n" "tlbwe %0,%3,%6\n" : - : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), + : "r" (attrs), "r" (phys), "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), "r" (entry), diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 953cc4a1cde560..c913931927c1ae 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -15,6 +15,8 @@ hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o obj-$(CONFIG_PPC64) += hash_utils_64.o \ slb_low.o slb.o stab.o \ gup.o mmap.o $(hash-y) + +obj-$(CONFIG_BGP) += mmap.o obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \ tlb_hash$(CONFIG_WORD_SIZE).o \ diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 91c7b8636b8a75..573d8c4b221e13 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -101,6 +101,73 @@ static int store_updates_sp(struct pt_regs *regs) return 0; } +#ifdef CONFIG_BGP +/* The icbi instruction does not broadcast to all cpus in the ppc450 processor used + * by Blue Gene/P. It is unlikely this problem will be exhibited in other processors + * so this remains ifdef'ed for BGP specifically. + * + * We deal with this by marking executable pages either writable, or executable, but + * never both. The permissions will fault back and forth if the thread is actively + * writing to executable sections. Each time we fault to become executable we flush + * the dcache into icache on all cpus. + */ +struct bgp_fixup_parm { + struct page *page; + unsigned long address; + struct vm_area_struct *vma; +}; +static void bgp_fixup_cache_tlb(void *parm) +{ + struct bgp_fixup_parm *p = parm; + + if (!PageHighMem(p->page)) + flush_dcache_icache_page(p->page); + local_flush_tlb_page(p->vma, p->address); +} + +static void bgp_fixup_access_perms(struct vm_area_struct *vma, + unsigned long address, + int is_write, int is_exec) +{ + struct mm_struct *mm = vma->vm_mm; + pte_t *ptep = NULL; + pmd_t *pmdp; + + if (get_pteptr(mm, address, &ptep, &pmdp)) { + spinlock_t *ptl = pte_lockptr(mm, pmdp); + pte_t old; + + spin_lock(ptl); + old = *ptep; + if (pte_present(old)) { + struct page *page = pte_page(old); + + if (is_exec) { + struct bgp_fixup_parm param = { + .page = page, + .address = address, + .vma = vma, + }; + pte_update(ptep, _PAGE_HWWRITE, 0); + on_each_cpu(bgp_fixup_cache_tlb, ¶m, 1); + pte_update(ptep, 0, _PAGE_HWEXEC); + pte_unmap_unlock(ptep, ptl); + return; + } + if (is_write && + (pte_val(old) & _PAGE_RW) && + (pte_val(old) & _PAGE_DIRTY) && + !(pte_val(old) & _PAGE_HWWRITE)) { + pte_update(ptep, _PAGE_HWEXEC, _PAGE_HWWRITE); + } + } + if (!pte_same(old, *ptep)) + flush_tlb_page(vma, address); + pte_unmap_unlock(ptep, ptl); + } +} +#endif + /* * For 600- and 800-family processors, the error_code parameter is DSISR * for a data fault, SRR1 for an instruction fault. For 400-family processors @@ -266,6 +333,7 @@ good_area: !(vma->vm_flags & (VM_READ | VM_WRITE)))) goto bad_area; #else +#ifndef CONFIG_BGP pte_t *ptep; pmd_t *pmdp; @@ -292,6 +360,7 @@ good_area: pte_unmap_unlock(ptep, ptl); } #endif +#endif /* a write */ } else if (is_write) { if (!(vma->vm_flags & VM_WRITE)) @@ -330,6 +399,12 @@ good_area: #endif } else current->min_flt++; + +#ifdef CONFIG_BGP + /* Fixup _PAGE_HWEXEC and _PAGE_HWWRITE if necessary */ + bgp_fixup_access_perms(vma, address, is_write, is_exec); +#endif + up_read(&mm->mmap_sem); return 0; diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 86010fc7d3b17a..2db81290964be7 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -48,11 +48,13 @@ static inline unsigned long mmap_base(void) static inline int mmap_is_legacy(void) { +#if defined(CONFIG_64BIT) /* * Force standard allocation for 64 bit programs. */ if (!test_thread_flag(TIF_32BIT)) return 1; +#endif if (current->personality & ADDR_COMPAT_LAYOUT) return 1; diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 52a0cfc38b6488..ba043c4c564e0f 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -46,7 +46,7 @@ static unsigned int next_context, nr_free_contexts; static unsigned long *context_map; static unsigned long *stale_map[NR_CPUS]; static struct mm_struct **context_mm; -static spinlock_t context_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(context_lock); #define CTX_MAP_SIZE \ (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1)) @@ -73,7 +73,6 @@ static unsigned int steal_context_smp(unsigned int id) struct mm_struct *mm; unsigned int cpu, max; - again: max = last_context - first_context; /* Attempt to free next_context first and then loop until we manage */ @@ -108,7 +107,9 @@ static unsigned int steal_context_smp(unsigned int id) spin_unlock(&context_lock); cpu_relax(); spin_lock(&context_lock); - goto again; + + /* This will cause the caller to try again */ + return MMU_NO_CONTEXT; } #endif /* CONFIG_SMP */ @@ -127,12 +128,12 @@ static unsigned int steal_context_up(unsigned int id) pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm); - /* Mark this mm has having no context anymore */ - mm->context.id = MMU_NO_CONTEXT; - /* Flush the TLB for that context */ local_flush_tlb_mm(mm); + /* Mark this mm has having no context anymore */ + mm->context.id = MMU_NO_CONTEXT; + /* XXX This clear should ultimately be part of local_flush_tlb_mm */ __clear_bit(id, stale_map[cpu]); @@ -194,6 +195,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) WARN_ON(prev->context.active < 1); prev->context.active--; } + + again: #endif /* CONFIG_SMP */ /* If we already have a valid assigned context, skip all that */ @@ -212,6 +215,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next) #ifdef CONFIG_SMP if (num_online_cpus() > 1) { id = steal_context_smp(id); + if (id == MMU_NO_CONTEXT) + goto again; goto stolen; } #endif /* CONFIG_SMP */ @@ -272,6 +277,7 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) */ void destroy_context(struct mm_struct *mm) { + unsigned long flags; unsigned int id; if (mm->context.id == MMU_NO_CONTEXT) @@ -279,18 +285,18 @@ void destroy_context(struct mm_struct *mm) WARN_ON(mm->context.active != 0); - spin_lock(&context_lock); + spin_lock_irqsave(&context_lock, flags); id = mm->context.id; if (id != MMU_NO_CONTEXT) { __clear_bit(id, context_map); mm->context.id = MMU_NO_CONTEXT; #ifdef DEBUG_MAP_CONSISTENCY mm->context.active = 0; - context_mm[id] = NULL; #endif + context_mm[id] = NULL; nr_free_contexts++; } - spin_unlock(&context_lock); + spin_unlock_irqrestore(&context_lock, flags); } #ifdef CONFIG_SMP diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig index 3496bc05058ed6..f08863a5bd9805 100644 --- a/arch/powerpc/platforms/44x/Kconfig +++ b/arch/powerpc/platforms/44x/Kconfig @@ -2,7 +2,6 @@ config BAMBOO bool "Bamboo" depends on 44x default n - select PPC44x_SIMPLE select 440EP select PCI help @@ -31,7 +30,6 @@ config SEQUOIA bool "Sequoia" depends on 44x default n - select PPC44x_SIMPLE select 440EPX help This option enables support for the AMCC PPC440EPX evaluation board. @@ -40,7 +38,6 @@ config TAISHAN bool "Taishan" depends on 44x default n - select PPC44x_SIMPLE select 440GX select PCI help @@ -51,7 +48,6 @@ config KATMAI bool "Katmai" depends on 44x default n - select PPC44x_SIMPLE select 440SPe select PCI select PPC4xx_PCI_EXPRESS @@ -62,7 +58,6 @@ config RAINIER bool "Rainier" depends on 44x default n - select PPC44x_SIMPLE select 440GRX select PCI help @@ -81,53 +76,34 @@ config WARP See http://www.pikatechnologies.com/ and follow the "PIKA for Computer Telephony Developers" link for more information. -config ARCHES - bool "Arches" - depends on 44x - default n - select PPC44x_SIMPLE - select 460EX # Odd since it uses 460GT but the effects are the same - select PCI - select PPC4xx_PCI_EXPRESS - help - This option enables support for the AMCC Dual PPC460GT evaluation board. - config CANYONLANDS bool "Canyonlands" depends on 44x default n - select PPC44x_SIMPLE select 460EX select PCI select PPC4xx_PCI_EXPRESS - select IBM_NEW_EMAC_RGMII - select IBM_NEW_EMAC_ZMII help This option enables support for the AMCC PPC460EX evaluation board. -config GLACIER - bool "Glacier" - depends on 44x - default n - select PPC44x_SIMPLE - select 460EX # Odd since it uses 460GT but the effects are the same - select PCI - select PPC4xx_PCI_EXPRESS - select IBM_NEW_EMAC_RGMII - select IBM_NEW_EMAC_ZMII - help - This option enables support for the AMCC PPC460GT evaluation board. - config YOSEMITE bool "Yosemite" depends on 44x default n - select PPC44x_SIMPLE select 440EP select PCI help This option enables support for the AMCC PPC440EP evaluation board. +config BGP + bool "Blue Gene/P" + depends on 44x + default y + select BLUEGENE + help + This option enables support for the IBM Blue Gene/P supercomputer. + + #config LUAN # bool "Luan" # depends on 44x @@ -160,21 +136,6 @@ config XILINX_VIRTEX440_GENERIC_BOARD Most Virtex 5 designs should use this unless it needs to do some special configuration at board probe time. -config PPC44x_SIMPLE - bool "Simple PowerPC 44x board support" - depends on 44x - default n - help - This option enables the simple PowerPC 44x platform support. - -config PPC4xx_GPIO - bool "PPC4xx GPIO support" - depends on 44x - select ARCH_REQUIRE_GPIOLIB - select GENERIC_GPIO - help - Enable gpiolib support for ppc440 based boards - # 44x specific CPU modules, selected based on the board above. config 440EP bool @@ -218,6 +179,8 @@ config 460EX bool select PPC_FPU select IBM_NEW_EMAC_EMAC4 + select IBM_NEW_EMAC_RGMII + select IBM_NEW_EMAC_ZMII select IBM_NEW_EMAC_TAH # 44x errata/workaround config symbols, selected by the CPU models above @@ -233,3 +196,185 @@ config XILINX_VIRTEX_5_FXT bool select XILINX_VIRTEX +config BLUEGENE + bool + select PPC_FPU + select PPC_DOUBLE_FPU + +config BLUEGENE_NOISY_BOOT + bool "Send Blue Gene boot messages to the control system" + depends on BLUEGENE + default n + help + Select this if you need to diagnose faults with the IO or Compute node kernel boot. + +config BLUEGENE_MAMBO + bool "Run on Blue Gene/P Mambo Simulator" + depends on BGP + +config L1_WRITETHROUGH + bool "Blue Gene enable writethrough mode" + depends on BLUEGENE + default n + +config BGP_DD1 + bool "Blue Gene enable workarounds for BG/P DD1" + default n + +config BLUEGENE_TCP + bool "Blue Gene/P TCP on Torus" + default y if BGP + +config BLUEGENE_DMA_MEMCPY + bool "Blue Gene copy_tofrom_user optimisation with the torus DMA unit" + depends on BLUEGENE + default n + help + 'copyin/out' via the BGP DMA is believed functional, but seems not useful since copying via the parallel FP regs + seems to run faster, even in cases where that wipes out the L1 cache. Code is left here in case someone wants to + try improving it, and to indicate which sections of the BGP DMA unit (injection fifo and reception counters) are needed + to make it work. + + +config BLUEGENE_COLLECTIVE_TRACE + bool "Activate diagnostic trace in BlueGene/P collective network" + default y if BGP + +config BLUEGENE_TORUS_TRACE + bool "Activate diagnostic trace in BlueGene/P torus network" + default y if BGP + + +config BLUEGENE_TCP_WITHOUT_NAPI + bool "Blue Gene/P TCP interrupt every packet (no NAPI) for debugging" + default n + +config BLUEGENE_UNIPROCESSOR + bool "Force BlueGene to run uniprocessor (450 debugging, or vrnic)" + depends on BLUEGENE + default n + +config BLUEGENE_STATISTICS + bool "Maintain statistics related to BlueGene networking" + depends on BLUEGENE + default y + +config BLUEGENE_SHARE_WITH_VRNIC + bool "Allow vRNIC to map all of Linux memory" + depends on BLUEGENE + default n + +config HUGE_KMALLOC + bool "Allow for 32MB kmalloc blocks" + default y if BGP + +config TASK_UNMAPPED_BASE + hex "Base virtual address for mmap" + depends on BGP + default "0x20000000" + help + processor.h will set this to (TASK_SIZE / 8 * 3) if you do not set it here + +config DEBUG_ALIGNMENT_HISTOGRAM + bool "copy_tofrom_user alignment histograms" + default y + help + Enables maintenance of alignment histograms for copy_tofrom_user and similar functions, + to explore whether alternative implementations might be useful for performance. + +config DEBUG_STACK_USAGE + bool "Stack utilization instrumentation" + default y if BGP + help + Enables the display of the minimum amount of free stack which each + task has ever had available in the sysrq-T and sysrq-P debug output. + + This option will slow down process creation somewhat. + +config BOOKE + bool + depends on 44x + default y + +config IBM_OCP + bool + depends on ASH || BAMBOO || BLUEGENE || BUBINGA || CPCI405 || EBONY || EP405 || LUAN || YUCCA || OCOTEA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT + default y + +config IBM_EMAC4 + bool + depends on 440GX || 440SP || 440SPE || BLUEGENE + default y + + config 405EP + bool + depends on BUBINGA + default y + +# Some of the items below might not be quite riight; I'm putting part of the 2.6.19 Kconfig in here, enough +# to get a BGP build working. tjcw. +config 405GP + bool + depends on CPCI405 || EP405 || WALNUT + default y + +config 405GPR + bool + depends on SYCAMORE + default y + +config STB03xxx + bool + depends on REDWOOD_5 || REDWOOD_6 + default y + +config EMBEDDEDBOOT + bool + depends on EP405 || XILINX_ML300 || XILINX_ML403 + default y + +config IBM_OPENBIOS + bool + depends on ASH || REDWOOD_5 || REDWOOD_6 + default y + +config PPC4xx_DMA + bool "PPC4xx DMA controller support" + depends on 4xx + +config PPC4xx_EDMA + bool + depends on !STB03xxx && PPC4xx_DMA + default y + +config PPC_GEN550 + bool + depends on 4xx + default y + +choice + prompt "TTYS0 device and default console" + depends on 40x + default UART0_TTYS0 + +config UART0_TTYS0 + bool "UART0" + +config UART0_TTYS1 + bool "UART1" + +endchoice + +config SERIAL_SICC + bool "SICC Serial port support" + depends on STB03xxx + +config UART1_DFLT_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y + +config SERIAL_SICC_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile index 01f51daace1394..3596f55f80f904 100644 --- a/arch/powerpc/platforms/44x/Makefile +++ b/arch/powerpc/platforms/44x/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_EBONY) += ebony.o obj-$(CONFIG_SAM440EP) += sam440ep.o obj-$(CONFIG_WARP) += warp.o obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o +obj-$(CONFIG_BGP) += bgp_cns.o bgp_bic.o bgp.o bgp_pers.o diff --git a/arch/powerpc/platforms/44x/bgp.c b/arch/powerpc/platforms/44x/bgp.c new file mode 100644 index 00000000000000..c180a447d0450d --- /dev/null +++ b/arch/powerpc/platforms/44x/bgp.c @@ -0,0 +1,205 @@ +/* + * Blue Gene/P board specific routines + * + * Todd Inglett <tinglett@us.ibm.com> + * Copyright 2003-2009 International Business Machines, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/init.h> +#include <linux/of_platform.h> +#include <linux/root_dev.h> +#include <linux/delay.h> + +#include <asm/machdep.h> +#include <asm/prom.h> +#include <asm/time.h> +#include <asm/ppc4xx.h> +#include <asm/mmu-44x.h> +#include <asm/smp.h> +#include <asm/cacheflush.h> +#include <asm/bluegene.h> +#include <asm/udbg.h> +#include <asm/bluegene_ras.h> + + +extern int bgWriteRasStr(unsigned int component, + unsigned int subcomponent, + unsigned int errCode, + char* str, + unsigned int strLen); +extern int bgFlushOutboxMsgs(void); + +/* + * bgp_probe() is called very early; cpu 0 only + * one pinned TLB + * device-tree isn't unflattened + * Look to see if the boot wrapper says we are a Blue Gene/P. + * Setup udbg_ptc, but it will do nothing until the CNS interface is initialized. + */ +static int __init bgp_probe(void) +{ + unsigned long root = of_get_flat_dt_root(); + + if (!of_flat_dt_is_compatible(root, "ibm,bluegenep")) + return 0; + + udbg_putc = bgp_udbg_putc; + return 1; +} + +/* + * There isn't a concept of a kernel asking to be rebooted on Blue Gene. + * The restart, power_off and halt functions should produce RAS to tell the control + * system this node is no longer functional. + */ +static void bgp_halt(void) +{ + bgWriteRasStr(bg_comp_kernel, bg_subcomp_linux, bg_code_halted, "System Halted", 0); + + // Flush halt RAS and any other buffered outbox messages. + while (bgFlushOutboxMsgs()); +} + +static void bgp_panic(char *str) +{ + bgWriteRasStr(bg_comp_kernel, bg_subcomp_linux, bg_code_panic, str, 0); + + // Flush halt RAS and any other buffered outbox messages. + while (bgFlushOutboxMsgs()); +} + +/* Blue Gene is given the decrementor frequency via the device tree (personality). */ +static void __init bgp_calibrate_decr(void) +{ + struct device_node *pernode = of_find_node_by_path("/ibm,bluegene/personality"); + + ppc_tb_freq = 0; + if (pernode) { + int len; + const unsigned *reg = of_get_property(pernode, "frequency", &len); + if (reg) + ppc_tb_freq = *reg; + } + if (ppc_tb_freq == 0) { + udbg_printf("personality/frequency device-tree field not found!\n"); + ppc_tb_freq = 850000000; /* A very good default */ + } + + ppc_proc_freq = ppc_tb_freq; + mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS); + mtspr(SPRN_TCR, TCR_DIE); +} + +/* Generic 44x init disables icache prefetch which can be enabled. */ +static void __init bgp_enable_icache_prefetch(void) +{ + mtspr(SPRN_CCR0, mfspr(SPRN_CCR0)|2); + isync(); + mb(); +} + +#ifdef CONFIG_SMP +/* + * The Blue Gene interrupt controller (in bgp_bic.c) can implement + * sending IPIs with a cpumask. Consider changing this interface. + */ +static void smp_bluegene_message_pass(int target, int msg) +{ + unsigned int i; + + if (target < NR_CPUS) { + bgp_send_ipi(target, msg); + } else { + for_each_online_cpu(i) { + if (target == MSG_ALL_BUT_SELF + && i == smp_processor_id()) + continue; + bgp_send_ipi(i, msg); + } + } +} + + +/* Return number of cpus possible in the system. + * We wire this to 4 even though it may disagree with NR_CPUS. + * + * Also a good time to register the IPI interrupt handlers. + * The cpu_present_map was already setup via setup_arch, so we use it. + */ +static int smp_bluegene_probe(void) +{ + return cpus_weight(cpu_possible_map); +} + +/* + * Start a cpu by calling firmware. + */ +static void smp_bluegene_kick_cpu(int cpu) +{ + int ret = bluegene_takeCPU(cpu, 0, (void (*)(unsigned, void *))4); + if (ret == 0) { + cpu_set(cpu, cpu_present_map); + } else { + udbg_printf("CPU %d is not available (firmware returns %d)\n", cpu, ret); + } +} + +/* + * Each secondary cpu needs some initialization. + */ +static void __init smp_bluegene_setup_cpu(int nr) +{ + int cpu = smp_processor_id(); + + flush_instruction_cache(); + bgp_enable_icache_prefetch(); + bgp_init_cns(); /* map CNS for this cpu */ + + bgp_init_IPI(cpu, PPC_MSG_CALL_FUNCTION); + bgp_init_IPI(cpu, PPC_MSG_RESCHEDULE); + bgp_init_IPI(cpu, PPC_MSG_CALL_FUNC_SINGLE); + bgp_init_IPI(cpu, PPC_MSG_DEBUGGER_BREAK); +} + +static struct smp_ops_t bluegene_smp_ops = { + .message_pass = smp_bluegene_message_pass, + .probe = smp_bluegene_probe, + .kick_cpu = smp_bluegene_kick_cpu, + .setup_cpu = smp_bluegene_setup_cpu, +}; +#endif + +/* + * Initialize CNS (Common Node Services) in bgp_cns.c. + * Once we have initialized CNS, we can crudely print messages with + * udbg_printf(). + */ +static void __init bgp_setup_arch(void) +{ + ROOT_DEV = Root_RAM0; + + bgp_enable_icache_prefetch(); + bgp_init_cns(); +#ifdef CONFIG_SMP + smp_ops = &bluegene_smp_ops; +#endif +} + +define_machine(bgp) { + .name = "bgp", + .probe = bgp_probe, + .setup_arch = bgp_setup_arch, + .init_IRQ = bgp_init_IRQ, + .get_irq = bgp_get_irq, + .restart = (void (*)(char *))bgp_halt, + .power_off = bgp_halt, + .halt = bgp_halt, + .panic = bgp_panic, + .calibrate_decr = bgp_calibrate_decr, + .progress = udbg_progress, +}; diff --git a/arch/powerpc/platforms/44x/bgp_bic.c b/arch/powerpc/platforms/44x/bgp_bic.c new file mode 100644 index 00000000000000..1d5e029b297259 --- /dev/null +++ b/arch/powerpc/platforms/44x/bgp_bic.c @@ -0,0 +1,675 @@ +/* + * Blue Gene/P interrupt controller + * + * Linux wants IRQs mapped to a small integer space. + * + * The bic defines 15 groups and 32 group interrupts in each group. + * We encode an IRQ number like this (which requires NR_IRQS=512): + * GGGGIIIII + * where GGGG is the 4-bit group number+1 (i.e. GGGG=0000 is not used), + * and IIIII is the 5-bit interrupt index within the 32-bit word. + * The interrupt indexes are numbered from the left bit (powerpc-style). + * We avoid encoding GGGG=0000 so we never end up with an IRO=0 which is a + * flag for "no interrupt" in arch/powerpc. + * + * The IPIs subdivide the group 0 interrupt word as follows: + * + * CRSD CRSD CRSD CRSD .... .... .... .... + * 0 4 8 12 16 20 24 28 + * cpu0 cpu1 cpu2 cpu3 + + * where C=call, R=resched, S=call-single, D=debug, and .=unused + * + * We encode IPI IRQ numbers specially. By the above encoding they would be + * 32..47 for these 16 bits. + * + * The other 16 bits in group 0 are treated normally. These will translate to + * IRQ = 48..63 and can be used by software to simulate hardware interrupts for + * other purposes. + * + * + * Todd Inglett <tinglett@us.ibm.com> + * Copyright 2003-2009 International Business Machines, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/io.h> +#include <linux/spinlock.h> +#include <asm/bluegene.h> + +/* #define TJCW_USE_BYTEWISE */ +/* #define BIC_DIAGNOSE 1 */ + +#if defined(BIC_DIAGNOSE) +extern int bgp_dma_tcp_tracemask ; +static int bic_diagnose_count ; +enum { + k_bic_diagnose_limit = 100 +}; +static unsigned int bic_diagnosing(void) +{ + if( 0 == (bgp_dma_tcp_tracemask & 0x80000000) ) + { + if( bic_diagnose_count < k_bic_diagnose_limit) + { + bic_diagnose_count += 1 ; + return 1 ; + } + } + else + { + bic_diagnose_count = 0 ; + } + return 0 ; +} +#define BIC_DIAG(X) if(bic_diagnosing()) { X ; } +#else +#define BIC_DIAG(X) +#endif + +void bic_unmask_irq(unsigned int irq); +EXPORT_SYMBOL(bic_unmask_irq) ; +static void bic_mask_irq(unsigned int irq); +static void bic_eoi_irq(unsigned int irq); + +static void bic_unmask_irq_bytewise(unsigned int irq) __attribute__((unused)) ; + +static void bic_mask_irq_bytewise(unsigned int irq) __attribute__((unused)) ; +#if defined(TJCW_USE_BYTEWISE) +static struct irq_chip bgp_irq_chip = { + .name = "BIC", + .unmask = bic_unmask_irq_bytewise, + .mask = bic_mask_irq_bytewise, + .eoi = bic_eoi_irq, +}; +#else +static struct irq_chip bgp_irq_chip = { + .name = "BIC", + .unmask = bic_unmask_irq, + .mask = bic_mask_irq, + .eoi = bic_eoi_irq, +}; +#endif + + +/* Note that the BIC (and other devices) are at phys addresses > 4GB */ +#define BIC_PHYS 0x730000000LL + +/* These are defined by the hardware. */ +#define NR_BIC_GROUPS 15 +#define NR_BIC_GINTS 32 +#define NR_BIC_CPUS 4 + +/* 4-bit target value for target register */ +#define BIC_TARGET_MASK (0xf) +#define BIC_TARGET_TYPE_NORMAL (1<<2) +#define BIC_TARGET_NORMAL(cpu) (BIC_TARGET_TYPE_NORMAL|(cpu)) +#define BIC_DEFAULT_CPU 0 +#define BIC_IPI_GROUP 0 + +/* Define the layout of each group's registers. + * This layout should be 0x80 bytes long (including pad). + */ +struct bic_group_regs { + uint32_t status; /* 0x00 RW */ + uint32_t rd_clr_status; /* 0x04 RO */ + uint32_t status_clr; /* 0x08 WO */ + uint32_t status_set; /* 0x0c WO */ + uint32_t target[4]; /* 0x10 RW */ + uint32_t normal[NR_BIC_CPUS]; /* 0x20 RW */ + uint32_t critical[NR_BIC_CPUS]; /* 0x30 RW */ + uint32_t mcheck[NR_BIC_CPUS]; /* 0x40 RW */ + uint32_t _pad[12]; /* 0x50 */ +}; + +/* Define the layout of the interrupt controller mem mapped regs. */ +struct bic_regs { + struct bic_group_regs group[NR_BIC_GROUPS]; /* 0x000 */ + uint32_t hier_normal[NR_BIC_CPUS]; /* 0x780 */ + uint32_t hier_critical[NR_BIC_CPUS]; /* 0x790 */ + uint32_t hier_mcheck[NR_BIC_CPUS]; /* 0x7a0 */ +}; + +/* This table is indexed by 'real' IRQ, i.e. BIC values. Linux 'virtual' IRQs are +32 */ +static volatile unsigned char intended_cpu_for_irq[NR_BIC_GROUPS*NR_BIC_GINTS] = + { +/* 0 */ + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(1),BIC_TARGET_NORMAL(1),BIC_TARGET_NORMAL(1),BIC_TARGET_NORMAL(1), + BIC_TARGET_NORMAL(2),BIC_TARGET_NORMAL(2),BIC_TARGET_NORMAL(2),BIC_TARGET_NORMAL(2), + BIC_TARGET_NORMAL(3),BIC_TARGET_NORMAL(3),BIC_TARGET_NORMAL(3),BIC_TARGET_NORMAL(3), + + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), +/* 32 */ + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), +/* 64 */ + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), +/* 128 */ + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), +/* 256 */ + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0), + BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0) +/* 480 */ + + }; + +static inline void out_be8(unsigned char * target, unsigned int val) +{ + *target = val ; +} + +static inline unsigned int in_be8(unsigned char * target) +{ + return *target ; +} + +/* Group is encoded in the upper 4 bits. We account for group+1. */ +static inline unsigned bic_irq_to_hwgroup(unsigned irq) +{ + return ((irq >> 5) & 0xf) - 1; +} +/* Gint is encoded in the bottom 5 bits. */ +static inline unsigned bic_irq_to_hwgint(unsigned irq) +{ + return irq & 0x1f; +} + +static inline unsigned bic_irq_to_hwirq(unsigned irq) +{ + return irq - (1 << 5); +} + +/* bic_hw_to_irq(unsigned group, unsigned gint) is in bluegene.h */ +/* Need to keep a track in memory of where each interrupt is pointed at + * so we can reassemble the right hardware register contents even with SMP behaviour + */ +static volatile unsigned char cpu_for_irq[NR_BIC_GROUPS*NR_BIC_GINTS] ; +static void set_cpu_for_hwirq(unsigned int hwirq, unsigned int tcpu) + { + cpu_for_irq[hwirq] = tcpu ; + } + +void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) + { + unsigned int hwirq=bic_irq_to_hwirq(irq) ; + if( irq < NR_BIC_GROUPS*NR_BIC_GINTS ) + { + intended_cpu_for_irq[hwirq] = BIC_TARGET_NORMAL(cpu) ; + } + BIC_DIAG(printk(KERN_INFO "bic_set_cpu_for_irq irq=0x%02x cpu=%d hwirq=0x%02x\n", + irq,cpu,hwirq)) ; + } + +/* Stop the BIC from passing an interrupt to the CPU. The idea is to */ +/* call this in a FLIH if you don't want a 'reinterrupt', and call */ +/* 'bic_set_cpu_for_irq' later on (e.g. from a NAPI 'poll') */ +void bic_disable_irq(unsigned int irq) + { + if( irq < NR_BIC_GROUPS*NR_BIC_GINTS ) + { + intended_cpu_for_irq[bic_irq_to_hwirq(irq)] = 0 ; + } + } + +EXPORT_SYMBOL(bic_disable_irq) ; + +int bic_get_cpu_for_irq(unsigned int irq) + { + return intended_cpu_for_irq[bic_irq_to_hwirq(irq)] ; + } + + +struct bic { + spinlock_t mask_lock; /* could be finer grained if necessary */ + struct bic_regs *regs; + uint32_t enabled_mask[NR_BIC_GROUPS] ; /* Hardware can report status even if a bit doesn't cause interrupt. This to mask off ... */ +} bic; + + +/* ipi_to_irq(cpu, msg) + * Produce a Linux IRQ number given a cpu+func. + * The caller ensures cpu in 0..3 and func in 0..3. + */ +static inline unsigned ipi_to_irq(unsigned cpu, unsigned func) +{ + return bic_hw_to_irq(BIC_IPI_GROUP, (cpu<<2)+func); +} +/* Generate a 4-bit IPI range mask for this cpu retaining the unused bits. */ +static inline unsigned ipi_mask(unsigned cpu) +{ + return 0xf0000000U >> (cpu << 2) | 0x0000ffffU; +} +/* Given an gint we know is an IPI (0..15), return the cpu that + * should be targeted. Remember these bits are numbered from the left. + */ +static inline unsigned ipi_gint_cpu(unsigned gint) +{ + return (gint >> 2) & 0x3; +} +static inline int is_ipi(unsigned group, unsigned gint) +{ + return (group == 0) && (gint < 16); +} + +#define GINT_TO_IRQ(group, gint) (((group) << 5) | (gint)) +static unsigned int get_tcpu_for_tnum(unsigned int group, unsigned int tnum) + { + unsigned int rbase = GINT_TO_IRQ(group,(tnum<<3)) ; + unsigned int t0 = cpu_for_irq[rbase+0] ; + unsigned int t1 = cpu_for_irq[rbase+1] ; + unsigned int t2 = cpu_for_irq[rbase+2] ; + unsigned int t3 = cpu_for_irq[rbase+3] ; + unsigned int t4 = cpu_for_irq[rbase+4] ; + unsigned int t5 = cpu_for_irq[rbase+5] ; + unsigned int t6 = cpu_for_irq[rbase+6] ; + unsigned int t7 = cpu_for_irq[rbase+7] ; + return ((t0 & 0x0f) << 28) | + ((t1 & 0x0f) << 24) | + ((t2 & 0x0f) << 20) | + ((t3 & 0x0f) << 16) | + ((t4 & 0x0f) << 12) | + ((t5 & 0x0f) << 8) | + ((t6 & 0x0f) << 4) | + ((t7 & 0x0f)) ; + + } +static unsigned int get_tcpu_for_tnum_byte(unsigned int group, unsigned int tnum) + { + unsigned int rbase = GINT_TO_IRQ(group,(tnum<<1)) ; + unsigned int t0 = cpu_for_irq[rbase+0] ; + unsigned int t1 = cpu_for_irq[rbase+1] ; + return ((t0 & 0x0f) << 4) | + ((t1 & 0x0f)) ; + + } +/* + * Unmasking an IRQ will enable it. + * We reach into the bic to set the target core of the interrupt appropriately. + * For now, interrupts are wired to a default core, although IPIs (of course) + * must be directed appropriately. + */ +void bic_unmask_irq(unsigned int irq) +{ + unsigned group = bic_irq_to_hwgroup(irq); + unsigned gint = bic_irq_to_hwgint(irq); + unsigned tnum = gint >> 3; + unsigned tidx = gint & 7; +/* unsigned orig, tmask, tcpu; */ + unsigned tmask, tcpu; + uint32_t *targetp = &bic.regs->group[group].target[tnum]; + unsigned cpu; + unsigned int request_tcpu ; + unsigned int verify_tcpu ; + + spin_lock(&bic.mask_lock); + bic.enabled_mask[group] |= 0x80000000 >> gint ; /* Note that this interrupt is enabled */ + spin_unlock(&bic.mask_lock); + + tmask= ~(0xf << (7-tidx)*4); + + if (group == 0 /*is_ipi(group, gint)*/) { + /* These bits are magic. We know they are for IPIs + * and must direct them to the correct core. + */ + cpu = ipi_gint_cpu(gint); + tcpu = BIC_TARGET_NORMAL(cpu) << (7-tidx)*4; + } else { + cpu = BIC_DEFAULT_CPU; + tcpu = BIC_TARGET_NORMAL(cpu) << (7-tidx)*4; + } + + + { + unsigned int hwirq = bic_irq_to_hwirq(irq) ; + unsigned int tgtcpu=intended_cpu_for_irq[hwirq] ; /* Note .. 'cpu' has the b'0100' bit set already if appropriate */ + set_cpu_for_hwirq(hwirq,tgtcpu) ; + request_tcpu=get_tcpu_for_tnum(group,tnum) ; +/* BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d targtval=0x%08x request_tcpy=0x%08x\n", */ +/* irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu,(orig & tmask)|tcpu, request_tcpu)) ; */ + BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d request_tcpy=0x%08x\n", + irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu, request_tcpu)) ; + + out_be32(targetp, request_tcpu) ; + verify_tcpu=get_tcpu_for_tnum(group,tnum) ; + while(request_tcpu != verify_tcpu) + { + /* If another CPU changed the target for an interrupt while we were writing, pick up the change */ + /* and set the hw register appropriately. Eventually the last writer should reflect what */ + /* everyone wants. */ + request_tcpu = verify_tcpu ; + printk(KERN_NOTICE "irq=0x%02x set=%x redo request_tcpu=%08x\n", irq,BIC_TARGET_NORMAL(cpu),request_tcpu) ; + out_be32(targetp, request_tcpu) ; + verify_tcpu=get_tcpu_for_tnum(group,tnum) ; + } + + } + +} +static void bic_unmask_irq_bytewise(unsigned int irq) +{ + unsigned group = bic_irq_to_hwgroup(irq); + unsigned gint = bic_irq_to_hwgint(irq); + unsigned tnum = gint >> 1; + unsigned tidx = gint & 1; +/* unsigned orig, tmask, tcpu; */ + unsigned tmask; + unsigned char *basep = (unsigned char *)(bic.regs->group[group].target) ; + unsigned char *targetp = basep+tnum ; + unsigned cpu; + unsigned int request_tcpu ; + unsigned int verify_tcpu ; + + spin_lock(&bic.mask_lock); + bic.enabled_mask[group] |= 0x80000000 >> gint ; /* Note that this interrupt is enabled */ + spin_unlock(&bic.mask_lock); + + tmask= ~(0xf << (1-tidx)*4); + + if (group == 0 /*is_ipi(group, gint)*/) { + /* These bits are magic. We know they are for IPIs + * and must direct them to the correct core. + */ + cpu = ipi_gint_cpu(gint); + } else { + cpu = BIC_DEFAULT_CPU; + } + + + { + unsigned int hwirq = bic_irq_to_hwirq(irq) ; + unsigned int tgtcpu=intended_cpu_for_irq[hwirq] ; /* Note .. 'cpu' has the b'0100' bit set already if appropriate */ + set_cpu_for_hwirq(hwirq,tgtcpu) ; + request_tcpu=get_tcpu_for_tnum_byte(group,tnum) ; +/* BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d targtval=0x%08x request_tcpy=0x%08x\n", */ +/* irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu,(orig & tmask)|tcpu, request_tcpu)) ; */ + BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d request_tcpy=0x%08x\n", + irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu, request_tcpu)) ; + + out_be8(targetp, request_tcpu) ; + verify_tcpu=get_tcpu_for_tnum_byte(group,tnum) ; + while(request_tcpu != verify_tcpu) + { + /* If another CPU changed the target for an interrupt while we were writing, pick up the change */ + /* and set the hw register appropriately. Eventually the last writer should reflect what */ + /* everyone wants. */ + request_tcpu = verify_tcpu ; + printk(KERN_NOTICE "irq=0x%02x set=%x redo request_tcpu=%08x\n", irq,BIC_TARGET_NORMAL(cpu),request_tcpu) ; + out_be8(targetp, request_tcpu) ; + verify_tcpu=get_tcpu_for_tnum_byte(group,tnum) ; + } + + } + +} + +/* + * Masking an IRQ will disable it. + * We do this by changing the target to disable. This works for IPI bits, + */ +static void bic_mask_irq(unsigned int irq) +{ + unsigned group = bic_irq_to_hwgroup(irq); + unsigned gint = bic_irq_to_hwgint(irq); + unsigned tnum = gint >> 3; + unsigned tidx = gint & 7; + unsigned orig, tmask; + uint32_t *targetp = &bic.regs->group[group].target[tnum]; + + tmask = BIC_TARGET_MASK << (7-tidx)*4; + BIC_DIAG(printk(KERN_INFO "bic_mask_irq irq=0x%02x group=0x%02x gint=0x%02x tmask=0x%02x\n", + irq,group,gint,tmask)) ; + spin_lock(&bic.mask_lock); + bic.enabled_mask[group] &= 0xffffffff ^ (0x80000000 >> gint) ; /* Note that this interrupt is disabled */ + orig = in_be32(targetp); + out_be32(targetp, orig & ~tmask); + spin_unlock(&bic.mask_lock); +} + +static void bic_mask_irq_bytewise(unsigned int irq) +{ + unsigned int hwirq = bic_irq_to_hwirq(irq) ; + unsigned group = bic_irq_to_hwgroup(irq); + unsigned gint = bic_irq_to_hwgint(irq); + unsigned tnum = gint >> 1; + unsigned tidx = gint & 1; + unsigned orig, tmask; + unsigned char *basep = (unsigned char *)(bic.regs->group[group].target) ; + unsigned char *targetp = basep+tnum ; + + set_cpu_for_hwirq(hwirq,0) ; + tmask = BIC_TARGET_MASK << ((1-tidx)*4); + BIC_DIAG(printk(KERN_INFO "bic_mask_irq irq=0x%02x group=0x%02x gint=0x%02x tmask=0x%02x\n", + irq,group,gint,tmask)) ; + spin_lock(&bic.mask_lock); + bic.enabled_mask[group] &= 0xffffffff ^ (0x80000000 >> gint) ; /* Note that this interrupt is disabled */ + orig = in_be8(targetp); + out_be8(targetp, orig & ~tmask); + spin_unlock(&bic.mask_lock); +} + +/* + * End an interrupt. We just need to write the bit to be cleared + * and the hardware handles it. No locking needed. + */ +static void bic_eoi_irq(unsigned int irq) +{ + unsigned group = bic_irq_to_hwgroup(irq); + unsigned gint = bic_irq_to_hwgint(irq); + uint32_t gintbits = 1 << (31 - gint); +/* BIC_DIAG(printk(KERN_INFO "bic_eoi_irq irq=0x%02x group=0x%02x gint=0x%02x \n",irq,group,gint)) ; */ + + out_be32(&bic.regs->group[group].status_clr, gintbits); + mb(); +} + +/* Return the hardware cpu index as needed by the bic. + * Currently this matches smp_processor_id(), but we do this explicitly + * in case we ever want to virtualize the processor id. + */ +static inline unsigned this_cpu(void) +{ + unsigned cpu; + asm volatile("mfspr %0, 0x11e" : "=r" (cpu)); + return cpu; +} + +/* Return 0..32 counting from the left (same as bic). 32=> no bit set. + * Could use bitops.h as long as it always matches the bic. + */ +static inline unsigned bic_find_first_bit(unsigned x) +{ + unsigned lz; + asm("cntlzw %0,%1" : "=r" (lz) : "r" (x)); + return lz; +} + +/* + * Get an IRQ from the BIC. + * We analyze the normal hierarchy register to find which group has caused an + * interrupt. Similarily, we find the first bit within a group to find the first + * source of interrupt. This artificially prioritizes interrupts. + * + * We handle IPIs specially. This core can see IPI bits which did not actually + * interrupt this core. We mask off those bits and otherwise process normally. + */ +unsigned int bgp_get_irq(void) +{ + unsigned thiscpu = this_cpu(); + unsigned nhier, group, gint; + uint32_t gintbits; + int irq = NO_IRQ; + + nhier = in_be32(&(bic.regs->hier_normal[thiscpu])); + group = bic_find_first_bit(nhier); + if (group >= NR_BIC_GROUPS) + goto out; + { + gintbits = in_be32(&bic.regs->group[group].status) & bic.enabled_mask[group] ; + if (group == BIC_IPI_GROUP) { + /* This may be an IPI. Mask out other cpu IPI bits so we don't try + * to handle it on this core! We don't mask the other 16 bits. + */ + unsigned mask = ipi_mask(thiscpu); + gintbits &= mask; + } + gint = bic_find_first_bit(gintbits); + } + if (gint >= NR_BIC_GINTS) + goto out; + irq = bic_hw_to_irq(group, gint); +out: +/* BIC_DIAG(printk(KERN_INFO "bgp_get_irq nhier=0x%02x group=0x%02x gintbits=0x%08x gint=0x%02x irq=0x%02x\n", */ +/* nhier,group,gintbits,gint,irq)) ; */ + return irq; +} + +#ifdef CONFIG_SMP +/* + * Send an IPI to another cpu. + * This could be coded to send to a cpu mask. + */ +enum { + k_spinlimit = 1000000 , + k_reportlimit = 100 +}; +static unsigned int reportcount ; +void bgp_send_ipi(int cpu, int msg) +{ + unsigned group = BIC_IPI_GROUP; + unsigned gint = ipi_to_irq(cpu, msg) & 0x1f; + uint32_t gintbits = 1 << (31 - gint); + uint32_t ngintbits; + unsigned int spincount = 0 ; + + /* If this interrupt is already raised we must wait for it to complete else + * we might race with the ack by the other waiting cpu. + * Once it is clear there is no guarantee another cpu won't take it in tandem + * with this cpu. Currently that is ok, because a reschedule race is harmless + * as the goal of rescheduling is met, and the others hold a lock while the + * operation is in progress. Why doesn't the lock protect us? There is a window + * between the lock release and the IPI interrupt ack where we will race. + * This plugs the race. It may be better to reallocate the IPI bits for unique + * core-to-core combinations. + */ + do { + spincount += 1 ; + ngintbits = in_be32(&bic.regs->group[group].status); + } while ( (ngintbits & gintbits) && (spincount < k_spinlimit) ) ; + + /* Pull the interrupt. */ + if( spincount < k_spinlimit) + { + out_be32(&bic.regs->group[group].status_set, gintbits); + } + else + { + if(reportcount < k_reportlimit) + { + printk(KERN_WARNING "bgp_send_ipi cpu=%d msg=%d stuck\n", cpu, msg) ; + reportcount += 1; + } + } +} + +/* Initialize an IPI handler. This is only here to use ipi_to_irq(), which + * could be exposed in bluegene.h. + */ +void bgp_init_IPI(int cpu, int msg) +{ + smp_request_message_ipi(ipi_to_irq(cpu, msg), msg); +} +#endif + +/* Initialize the bic. + * We set the handlers as percpu because bic interrupts are wired + * to specific cores (we never broadcast to all cores). + */ +static void __init disable_all_bic_interrupts(void) +{ + int group ; + struct bic_regs * regs = bic.regs ; + for(group=0; group<NR_BIC_GROUPS; group += 1) + { + struct bic_group_regs *group_regs = regs->group+group ; + group_regs->target[0] = 0 ; + group_regs->target[1] = 0 ; + group_regs->target[2] = 0 ; + group_regs->target[3] = 0 ; + bic.enabled_mask[group] = 0 ; + } +} +void __init bgp_init_IRQ(void) +{ + int irq; + + bic.regs = ioremap(BIC_PHYS, sizeof(*bic.regs)); + disable_all_bic_interrupts() ; + bic.mask_lock = SPIN_LOCK_UNLOCKED; + for_each_irq(irq) { + /* Interrupts from the BIC are percpu (we don't use broadcast) + * so we may as well take the cycle advantage and declare it. + */ + set_irq_chip_and_handler(irq, &bgp_irq_chip, handle_percpu_irq); + } +} + +EXPORT_SYMBOL(bic) ; +EXPORT_SYMBOL(bic_set_cpu_for_irq) ; diff --git a/arch/powerpc/platforms/44x/bgp_cns.c b/arch/powerpc/platforms/44x/bgp_cns.c new file mode 100644 index 00000000000000..1cccd1962589c9 --- /dev/null +++ b/arch/powerpc/platforms/44x/bgp_cns.c @@ -0,0 +1,244 @@ +/* + * Blue Gene/P Common Node Services (CNS) wrappers + * + * These are declared in asm/bluegene.h but implemented here. + * + * Copyright 2003-2009 International Business Machines, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * Author: Todd Inglett <tinglett@us.ibm.com> + */ + +#include <linux/init.h> +#include <linux/of_platform.h> +#include <asm/pgtable.h> +#include <asm/bluegene.h> +#include <asm/bgcns.h> + +/* The descriptor for CNS identifies location and entry point of firmware. + * We re-build it from data passed through the ibm,bluegene-cns device tree entry. + */ +BGCNS_Descriptor bgcnsd; + +/* These functions spin on specific errors when we can't print messages. + * They make it easy to find the cause of the error by finding the iar in the + * kernel System.map. + */ +static void noinline __init bgp_fatal_no_ibm_bluegene_cns(void) { for (;;); } +static void noinline __init bgp_fatal_no_base_va(void) { for (;;); } +static void noinline __init bgp_fatal_no_base_pa(void) { for (;;); } +static void noinline __init bgp_fatal_no_services(void) { for (;;); } +static void noinline __init bgp_fatal_no_size(void) { for (;;); } +static void noinline __init bgp_fatal_no_version(void) { for (;;); } + +/* Get the descriptor for CNS from the device tree. + * Don't inline so we can make out the stack trace easier when it isn't working. + */ +static void noinline __init get_cns_descriptor(BGCNS_Descriptor *bgcnsd) +{ + int len; + const unsigned *reg; + struct device_node *devcns = of_find_node_by_path("/ibm,bluegene/cns"); + + if (!devcns) bgp_fatal_no_ibm_bluegene_cns(); + + reg = of_get_property(devcns, "base-va", &len); + if (!reg) bgp_fatal_no_base_va(); + bgcnsd->baseVirtualAddress = *reg; + reg = of_get_property(devcns, "base-pa", &len); + if (!reg) bgp_fatal_no_base_pa(); + bgcnsd->basePhysicalAddress = *reg; + bgcnsd->basePhysicalAddressERPN = 0; /* assumes DDR <= 4G */ + reg = of_get_property(devcns, "services", &len); + if (!reg) bgp_fatal_no_services(); + bgcnsd->services = (void *)(*reg); + reg = of_get_property(devcns, "size", &len); + if (!reg) bgp_fatal_no_size(); + bgcnsd->size = *reg; + reg = of_get_property(devcns, "version", &len); + if (!reg) bgp_fatal_no_version(); + bgcnsd->version = *reg; +} + +void __init ppc44x_update_tlb_hwater(void); /* from mm/44x_mmu.c */ + +static void noinline __init map_cns(BGCNS_Descriptor *bgcnsd) +{ + unsigned word0, word1, word2; + int entry = 62; /* We reserve one of the PPC44x_EARLY_TLBS in asm/mmu-44x.h */ + + word0 = (bgcnsd->baseVirtualAddress & 0xfffff000) | PPC44x_TLB_VALID | PPC44x_TLB_256K; + word1 = (bgcnsd->basePhysicalAddress & 0xfffff000) | (bgcnsd->basePhysicalAddressERPN & 0xf); + word2 = PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_M | PPC44x_TLB_WL1 | PPC44x_TLB_U2; + __asm__ __volatile__( + "tlbwe %1,%0,0\n" + "tlbwe %2,%0,1\n" + "tlbwe %3,%0,2\n" + "isync\n" : : "r" (entry), "r" (word0), "r" (word1), "r" (word2)); +} + +extern int map_page(unsigned long va, phys_addr_t pa, int flags); + +void __init bgp_init_cns(void) +{ + unsigned long v_start, v_end, v, p; + + if (bgcnsd.size == 0) { + /* Get the descriptor, map CNS, and tell Linux about the mapping. */ + get_cns_descriptor(&bgcnsd); + v_start = bgcnsd.baseVirtualAddress; + v_end = v_start + bgcnsd.size; + v_start -= PAGE_SIZE; /* hack: reserve 1 extra page */ + v = v_start; + p = bgcnsd.basePhysicalAddress; /* always < 4G */ + /* We must be careful because we could hit 4G and wrap to v == 0. + * Hence the v > v_start check. + */ + for (; v < v_end && v > v_start; v += PAGE_SIZE, p += PAGE_SIZE) + map_page(v, p, _PAGE_RAM_TEXT); + } + map_cns(&bgcnsd); +} + +/* Simple udbg_putc. We perform rudimentary buffering so it is readable. */ +static int bgp_udbg_cur = 0; +static char bgp_udbg_buf[256]; +void bgp_udbg_putc(char c) +{ + bgp_udbg_buf[bgp_udbg_cur++] = c; + if (c == '\n' || bgp_udbg_cur >= sizeof(bgp_udbg_buf)) { + if (bgcnsd.size) + bluegene_writeToMailboxConsole(bgp_udbg_buf, bgp_udbg_cur); + bgp_udbg_cur = 0; + } +} + + +#define CALLCNS(service) \ + ({ unsigned flags; \ + typeof(bgcnsd.services->service) ret; \ + local_save_flags(flags); \ + local_irq_disable(); \ + ret = bgcnsd.services->service; \ + local_irq_restore(flags); \ + ret; \ + }) + + +/* This returns non-zero if there is something in an input mailbox. */ +int bluegene_testInboxAttention(void) +{ + /* ToDo: this should be fast. Read the DCR directly. */ + return CALLCNS(testInboxAttention()); +} + +int bluegene_testForOutboxCompletion(void) +{ + return CALLCNS(testForOutboxCompletion()); +} + +int bluegene_writeRASEvent_nonBlocking(unsigned facility, + unsigned unit, + unsigned short err_code, + unsigned numDetails, + unsigned details[]) +{ + return CALLCNS(writeRASEvent_nonBlocking(facility, unit, err_code, numDetails, details)); +} + +int bluegene_writeRASString(unsigned facility, + unsigned unit, + unsigned short err_code, + char* str) +{ + return CALLCNS(writeRASString(facility, unit, err_code, str)); +} + +int bluegene_writeRASString_nonBlocking(unsigned facility, + unsigned unit, + unsigned short err_code, + char* str) +{ + return CALLCNS(writeRASString_nonBlocking(facility, unit, err_code, str)); +} + +int bluegene_writeToMailboxConsole(char *msg, unsigned msglen) +{ + return CALLCNS(writeToMailboxConsole(msg, msglen)); +} + +int bluegene_writeToMailboxConsole_nonBlocking(char *msg, unsigned msglen) +{ + return CALLCNS(writeToMailboxConsole_nonBlocking(msg, msglen)); +} + +unsigned bluegene_readFromMailboxConsole(char *buf, unsigned bufsize) +{ + return CALLCNS(readFromMailboxConsole(buf, bufsize)); +} + +int bluegene_macResetPHY(void) +{ + return CALLCNS(macResetPHY()); +} + /* ! @brief Tests the MAC unit's link but does not block. */ + /* ! @param[in] link_type specifies the type of link to be tested. */ + /* ! @param[out] result points to the link status, which is valid only when the return code is */ + /* ! BGCNS_RC_COMPLETE. A value of one (1) indicates that the link is active; zero (0) */ + /* ! indicates that it is inactive. */ + /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */ + /* ! test link sequence. That is, callers should initiate a sequence with reset=1 and then */ + /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this service again with */ + /* ! reset=0. */ + /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */ + /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */ + /* ! to indicate that it needs additional time. */ + /* ! @return BGCNS_RC_COMPLETE if the test is complete (result is valid only in this case). BGCNS_RC_CONTINUE */ + /* ! if the reset operation is not yet complete. BGCNS_RC_ERROR if the reset operation failed. */ + int (*macTestLink_nonBlocking)(BGCNS_LinkType link_type, unsigned* result, int reset, unsigned timeoutInMillis); + + +int bluegene_macTestRxLink(void) +{ + return CALLCNS(macTestLink(BGCNS_Receiver)); +} + + +int bluegene_macTestTxLink(void) +{ + return CALLCNS(macTestLink(BGCNS_Transmitter)); +} + +int bluegene_takeCPU(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg)) +{ + return CALLCNS(takeCPU(cpu, arg, entry)); +} + + +int bluegene_getPersonality(void *buff, unsigned buffSize) +{ + int sz; + unsigned flags; + + local_save_flags(flags); + local_irq_disable(); + sz = bgcnsd.services->getPersonalitySize(); + if (sz > buffSize) + sz = buffSize; + memcpy(buff, bgcnsd.services->getPersonalityData(), sz); + local_irq_restore(flags); + + return sz; +} + +int bluegene_mapXEMAC(void* baseAddr) +{ + return CALLCNS(mapDevice(BGCNS_XEMAC, baseAddr)); +} + +EXPORT_SYMBOL(bluegene_getPersonality) ; +EXPORT_SYMBOL(bgcnsd) ; diff --git a/arch/powerpc/platforms/44x/bgp_pers.c b/arch/powerpc/platforms/44x/bgp_pers.c new file mode 100644 index 00000000000000..431666565b4d8d --- /dev/null +++ b/arch/powerpc/platforms/44x/bgp_pers.c @@ -0,0 +1,345 @@ +/* + * + * Blue Gene personality /proc interface with the control system + * + * Copyright 2003,2005 International Business Machines + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * User apps can mmap /proc/personality to directly access the binary + * personality in SRAM (see bglpersonality.h), or they can read + * /proc/personality.sh which expands to shell commands (so it can be sourced) + * + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> + +#include <asm/bluegene.h> +#include <asm/bgp_personality.h> + + +static struct proc_dir_entry *personality_proc_entry = NULL; +static struct proc_dir_entry *personality_sh_proc_entry = NULL; + + +static BGP_Personality_t bgpers; + +/* Binary personality interface. Doesn't need to be fast. */ +static int bgpersonality_read(char *page, char **start, off_t offset, + int count, int *eof, void *data) +{ + bluegene_getPersonality(&bgpers, count); + memcpy(page, &bgpers, count); + *eof = 1; + + return count; +} + + +static void* bgpers_sh_seq_start(struct seq_file* f, + loff_t* pos) +{ + return *pos <= 32 ? (void*) pos : (void*) NULL; +} + + +static void* bgpers_sh_seq_next(struct seq_file* f, + void* v, + loff_t* pos) +{ + return ++(*pos) <= 32 ? (void*) pos : (void*) NULL; +} + + +static void bgpers_sh_seq_stop(struct seq_file* f, + void* v) +{ + return; +} + + +/* Produce a personality in a form parsable by a shell. */ +static int bgpers_sh_seq_show(struct seq_file* f, + void* v) +{ + loff_t offset = *((loff_t*) v); + BGP_UCI_ComputeCard_t* uci; + + bluegene_getPersonality(&bgpers, sizeof(bgpers)); + uci = (BGP_UCI_ComputeCard_t*) &bgpers.Kernel_Config.UniversalComponentIdentifier; + + switch((unsigned long) offset) { + case 0: + seq_printf(f, "BG_UCI=%08x\n", + bgpers.Kernel_Config.UniversalComponentIdentifier); + break; + case 1: + seq_printf(f, "BG_LOCATION=R%1x%1x-M%c-N%02d-J%02d\n", + uci->RackRow, uci->RackColumn, (uci->Midplane ? '1' : '0'), + uci->NodeCard, uci->ComputeCard); + break; + case 2: + seq_printf(f, "BG_MAC=%02x:%02x:%02x:%02x:%02x:%02x\n", + bgpers.Ethernet_Config.EmacID[0], + bgpers.Ethernet_Config.EmacID[1], + bgpers.Ethernet_Config.EmacID[2], + bgpers.Ethernet_Config.EmacID[3], + bgpers.Ethernet_Config.EmacID[4], + bgpers.Ethernet_Config.EmacID[5]); + break; + case 3: + seq_printf(f, "BG_IP=%d.%d.%d.%d\n", + bgpers.Ethernet_Config.IPAddress.octet[12], + bgpers.Ethernet_Config.IPAddress.octet[13], + bgpers.Ethernet_Config.IPAddress.octet[14], + bgpers.Ethernet_Config.IPAddress.octet[15]); + break; + case 4: + seq_printf(f, "BG_NETMASK=%d.%d.%d.%d\n", + bgpers.Ethernet_Config.IPNetmask.octet[12], + bgpers.Ethernet_Config.IPNetmask.octet[13], + bgpers.Ethernet_Config.IPNetmask.octet[14], + bgpers.Ethernet_Config.IPNetmask.octet[15]); + break; + case 5: + seq_printf(f, "BG_BROADCAST=%d.%d.%d.%d\n", + bgpers.Ethernet_Config.IPBroadcast.octet[12], + bgpers.Ethernet_Config.IPBroadcast.octet[13], + bgpers.Ethernet_Config.IPBroadcast.octet[14], + bgpers.Ethernet_Config.IPBroadcast.octet[15]); + break; + case 6: + seq_printf(f, "BG_GATEWAY=%d.%d.%d.%d\n", + bgpers.Ethernet_Config.IPGateway.octet[12], + bgpers.Ethernet_Config.IPGateway.octet[13], + bgpers.Ethernet_Config.IPGateway.octet[14], + bgpers.Ethernet_Config.IPGateway.octet[15]); + break; + case 7: + seq_printf(f, "BG_MTU=%d\n", bgpers.Ethernet_Config.MTU); + break; + case 8: + seq_printf(f, "BG_FS=%d.%d.%d.%d\n", + bgpers.Ethernet_Config.NFSServer.octet[12], + bgpers.Ethernet_Config.NFSServer.octet[13], + bgpers.Ethernet_Config.NFSServer.octet[14], + bgpers.Ethernet_Config.NFSServer.octet[15]); + break; + case 9: + seq_printf(f, "BG_EXPORTDIR=\"%s\"\n", bgpers.Ethernet_Config.NFSExportDir); + break; + case 10: + seq_printf(f, "BG_SIMULATION=%d\n", + (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_Simulation ? 1 : 0)); + break; + case 11: + seq_printf(f, "BG_PSETNUM=%d\n", bgpers.Network_Config.PSetNum); + break; + case 12: + seq_printf(f, "BG_NUMPSETS=%d\n", bgpers.Network_Config.IOnodes); + break; + case 13: + seq_printf(f, "BG_NODESINPSET=%d\n", bgpers.Network_Config.PSetSize); + break; + case 14: + seq_printf(f, "BG_XSIZE=%d\n", bgpers.Network_Config.Xnodes); + break; + case 15: + seq_printf(f, "BG_YSIZE=%d\n", bgpers.Network_Config.Ynodes); + break; + case 16: + seq_printf(f, "BG_ZSIZE=%d\n", bgpers.Network_Config.Znodes); + break; + case 17: + seq_printf(f, "BG_VERBOSE=%d", (bgpers.Kernel_Config.TraceConfig & BGP_TRACE_VERBOSE) ? 1 : 0); + break; + case 18: + switch (bgpers.Network_Config.PSetSize) { + case 16: + seq_printf(f, "BG_PSETSIZE=\"4 2 2\"\n"); + break; + case 32: + seq_printf(f, "BG_PSETSIZE=\"4 4 2\"\n"); + break; + case 64: + seq_printf(f, "BG_PSETSIZE=\"4 4 4\"\n"); + break; + case 128: + seq_printf(f, "BG_PSETSIZE=\"4 4 8\"\n"); + break; + case 256: + seq_printf(f, "BG_PSETSIZE=\"8 4 8\"\n"); + break; + case 512: + seq_printf(f, "BG_PSETSIZE=\"8 8 8\"\n"); + break; + default: + seq_printf(f, "BG_PSETSIZE=\"? ? ?\"\n"); + } + break; + case 19: +/* if (bgpers.Network_Config.RankInPSet) */ +/* // Not an IO node so display pset origin. */ + seq_printf(f, "BG_PSETORG=\"%d %d %d\"\n", + bgpers.Network_Config.Xcoord, + bgpers.Network_Config.Ycoord, + bgpers.Network_Config.Zcoord); + break; + case 20: + seq_printf(f, "BG_CLOCKHZ=%d\n", bgpers.Kernel_Config.FreqMHz); + break; + case 21: + seq_printf(f, "BG_GLINTS=%d\n", + (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_GlobalInts) ? 1 : 0); + break; + case 22: + seq_printf(f, "BG_ISTORUS=\"%s%s%s\"\n", + (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_TorusMeshX) ? "X" : "", + (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_TorusMeshY) ? "Y" : "", + (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_TorusMeshZ) ? "Z" : ""); + break; + case 23: { + char blockID[BGP_PERSONALITY_LEN_NFSDIR+1]; + + strncpy(blockID, bgpers.Ethernet_Config.NFSMountDir, sizeof(blockID)); + blockID[sizeof(blockID)-1] = '\0'; + seq_printf(f, "BG_BLOCKID=\"%s\"\n", blockID); + break; + } + case 24: + seq_printf(f, "BG_SN=%d.%d.%d.%d\n", + bgpers.Ethernet_Config.serviceNode.octet[12], + bgpers.Ethernet_Config.serviceNode.octet[13], + bgpers.Ethernet_Config.serviceNode.octet[14], + bgpers.Ethernet_Config.serviceNode.octet[15]); + break; + case 25: + seq_printf(f, "BG_IS_IO_NODE=%d\n", (bgpers.Network_Config.RankInPSet ? 0 : 1)); + break; + case 26: + seq_printf(f, "BG_RANK_IN_PSET=%d\nBG_RANK=%d\n", + bgpers.Network_Config.RankInPSet, + bgpers.Network_Config.Rank); + break; + case 27: + seq_printf(f, "BG_IP_OVER_COL=%d\n", (bgpers.Block_Config & BGP_PERS_BLKCFG_IPOverCollective) ? 1 : 0); + break; + case 28: + seq_printf(f, "BG_IP_OVER_TOR=%d\n", (bgpers.Block_Config & BGP_PERS_BLKCFG_IPOverTorus) ? 1 : 0); + break; + case 29: + seq_printf(f, "BG_IP_OVER_COL_VC=%d\n", (bgpers.Block_Config & BGP_PERS_BLKCFG_IPOverCollectiveVC) ? 1 : 0); + break; + case 30: + if ((bgpers.Block_Config & BGP_PERS_BLKCFG_CIOModeSel(3)) == BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_MuxOnly)) + seq_printf(f, "BG_CIO_MODE=MUX_ONLY\n"); + else if ((bgpers.Block_Config & BGP_PERS_BLKCFG_CIOModeSel(3)) == BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_None)) + seq_printf(f, "BG_CIO_MODE=NONE\n"); + else if ((bgpers.Block_Config & BGP_PERS_BLKCFG_CIOModeSel(3)) == BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full)) + seq_printf(f, "BG_CIO_MODE=FULL\n"); + else + seq_printf(f, "BG_CIO_MODE=UNKNOWN\n"); + break; + case 31: + if ((bgpers.Block_Config & BGP_PERS_BLKCFG_bgsysFSSel(3)) == BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3)) + seq_printf(f, "BG_BGSYS_FS_TYPE=NFSv3\n"); + else if ((bgpers.Block_Config & BGP_PERS_BLKCFG_bgsysFSSel(3)) == BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv4)) + seq_printf(f, "BG_BGSYS_FS_TYPE=NFSv4\n"); + else + seq_printf(f, "BG_BGSYS_FS_TYPE=UNKNOWN\n"); + break; + case 32: + seq_printf(f, "BG_HTC_MODE=%d\n", + (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_HighThroughput) ? 1 : 0); + break; + default: + seq_printf(f, "Illegal offset %d\n", (unsigned int) offset); + } + + return 0; +} + +void bgpersonality_cleanup_module(void) +{ + if (personality_proc_entry) { + remove_proc_entry(personality_proc_entry->name, NULL); + } + + if (personality_sh_proc_entry) { + remove_proc_entry(personality_sh_proc_entry->name, NULL); + } +} + + + +static struct seq_operations bgpers_sh_seq_ops = { + .start = bgpers_sh_seq_start, + .next = bgpers_sh_seq_next, + .stop = bgpers_sh_seq_stop, + .show = bgpers_sh_seq_show +}; + + + +static int bgpers_sh_proc_open(struct inode* inode, + struct file* f) +{ + return seq_open(f, &bgpers_sh_seq_ops); +} + + +static struct file_operations bgpers_sh_fops = { + .owner = THIS_MODULE, + .open = bgpers_sh_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + + +int bgpersonality_init_module(void) +{ + personality_proc_entry = create_proc_read_entry("personality", 0644, NULL, + bgpersonality_read, (void *) 0); + if (!personality_proc_entry) + goto out; + + personality_sh_proc_entry = create_proc_entry("personality.sh", 0, NULL); + if (!personality_sh_proc_entry) + goto out; + else + personality_sh_proc_entry->proc_fops = &bgpers_sh_fops; + + return 0; + +out: + bgpersonality_cleanup_module(); + + return -ENOMEM; +} + + +module_init(bgpersonality_init_module); +module_exit(bgpersonality_cleanup_module); diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index e868b5c50723d3..928d46ff72d926 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -231,7 +231,7 @@ config VIRT_CPU_ACCOUNTING If in doubt, say Y here. config SMP - depends on PPC_STD_MMU || FSL_BOOKE + depends on PPC_STD_MMU || BOOKE bool "Symmetric multi-processing support" ---help--- This enables support for systems with more than one CPU. If you have @@ -259,9 +259,13 @@ config NR_CPUS config NOT_COHERENT_CACHE bool depends on 4xx || 8xx || E200 || PPC_MPC512x + default n if BGP default y config CHECK_CACHE_COHERENCY bool +config L1_WRITETHROUGH + bool + endmenu diff --git a/arch/powerpc/syslib/bgdd/Makefile b/arch/powerpc/syslib/bgdd/Makefile new file mode 100644 index 00000000000000..4ee8cb1bae8231 --- /dev/null +++ b/arch/powerpc/syslib/bgdd/Makefile @@ -0,0 +1,11 @@ +#CFLAGS += -Wa,-m450 + +EXTRA_CFLAGS := -D__LINUX_KERNEL__ -Wno-declaration-after-statement + +obj-$(CONFIG_BGP_DMA) += bgp_dma_spi.o + + +bgp_dma_spi-y := bgp_dma_base.o +bgp_dma_spi-y += spi/DMA_InjFifo.o +bgp_dma_spi-y += spi/DMA_RecFifo.o +bgp_dma_spi-y += spi/DMA_Descriptors.o diff --git a/arch/powerpc/syslib/bgdd/bgp_dma_base.c b/arch/powerpc/syslib/bgdd/bgp_dma_base.c new file mode 100644 index 00000000000000..5703368c524537 --- /dev/null +++ b/arch/powerpc/syslib/bgdd/bgp_dma_base.c @@ -0,0 +1,1284 @@ +/********************************************************************** + * + * Copyright (c) 2007, 2009 International Business Machines + * Chris Ward <tjcw@uk.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + **********************************************************************/ + + +/* ************************************************************************* */ +/* includes */ +/* ************************************************************************* */ + +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/highmem.h> +#include <linux/mman.h> +#include <linux/syscalls.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <linux/vmalloc.h> + +#include <linux/hugetlb.h> +/* #include <asm/bluegene.h> */ + +#include <asm/bgcns.h> + +#if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) +#define TORNIC_TORUS_AFFINITY +#endif + +/* int bgp_dma_irq ; */ +#if defined(TORNIC_TORUS_AFFINITY) +void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ; +enum { + k_TorusAffinityCPU = 2 +}; +#endif + +#define TRACE(x) printk x + +#define CHECK_RET(x) if (x) { TRACE((KERN_INFO \ + "bgpdma: Return due error at line %d\n",\ + __LINE__)); \ + return ret; } + +#undef CHECK_PARAM +#define CHECK_PARAM(x) if (!(x)) { printk( KERN_INFO \ + "(E) bgpdma: Assertion failed in %s:%d\n", \ + __FILE__,__LINE__); \ + return -EINVAL; } +#undef HPC_MODE +/* #define HPC_MODE */ + + +/* ************************************************************************* */ +/* Include firmware */ +/* ************************************************************************* */ + +/* ************************************************************************* */ +/* Defines and friends required by DMA SPI in kernel mode */ +/* ************************************************************************* */ + +#include <spi/linux_kernel_spi.h> + +/* #include "bgp_bic_diagnosis.h" */ +/* ************************************************************************* */ +/* IOCTL commands */ +/* ************************************************************************* */ + +/* size of mmap'ed IO memory */ +#define BGP_DMA_MMAP_SIZE (4096 * 4) +/* */ +///* ************************************************************************* */ +///* network device structures */ +///* ************************************************************************* */ +/* */ +struct bgpdma_state_t +{ + uint32_t inj_counters[4]; /* for each group, a bit mask of which injection counter subgroups allocated */ + /* bits 0 - 7 are valid, 8 subgroups of 8 counters/subgroup */ + uint32_t rec_counters[4]; /* for each group, a bit mask of which reception counter subgroups allocated */ + /* bits 0 - 7 are valid, 8 subgroups of 8 counters/subgroup */ + uint32_t inj_fifos[4]; /* for each group, a bit mask of which injection fifos have been allocated */ + /* bits 0 - 31 are valid */ + + uint32_t rec_fifo_set_map; /* if 1, _bgp_DMA_RecFifoSetMap has already been called */ + + uint32_t rec_fifo_init[2]; /* set bit to 1 if receive fifo has already been intialized, */ + /* bits 0-31 of rec_fifo_init[0] for normal fifos */ + /* bits 0-3 of rec_fifo_init[1] for header fifos */ +}; + +/* max number of registered interrupt handlers */ +#define MAX_NUM_IRQ 4 + + +/* interrupt info sctructure */ +struct dma_irq +{ + int irq; /* irq number for this group */ + /* (fixed at module init time) */ + Kernel_CommThreadHandler func; + u32 arg1; +}; + + + +struct bgpdma_dev_t +{ + unsigned long long pa_addr; /* physical address */ + struct bgpdma_state_t state; /* dma resource state */ + struct dma_irq irqInfo[ MAX_NUM_IRQ ]; /* dma interrupts */ +}; +/* */ +static struct bgpdma_dev_t bgpdma_dev; + +/* ************************************************************************* */ +/* Linux module header */ +/* ************************************************************************* */ + +MODULE_DESCRIPTION("BG/P DMA driver"); +MODULE_LICENSE("GPL"); + +#define BGP_DMA_NAME "bgpdma" + +/* Threshold crossed irq number for rec fifo groups */ +#define DMA_RECFIFO_THRESHOLD_IRQ(group) ((_BGP_IC_DMA_NFT_G2_HIER_POS<<5)|(28+group)) +#define DMA_RECFIFO_THRESHOLD_IRQ_GINT(group) (28+group) + +/* Threshold crossed irq number for rec fifo groups */ +#define TORUS_RECFIFO_WATERMARK_IRQ(fifo) ((_BGP_IC_DMA_NFT_G2_HIER_POS<<5)|(8+fifo)) +#define TORUS_RECFIFO_WATERMARK_IRQ_GINT(fifo) (8+fifo) + +/* ************************************************************************* */ +/* module initialization/cleanup */ +/* ************************************************************************* */ + +static int __init + bgpdma_module_init (void); +static void __exit + bgpdma_module_cleanup (void); + +extern BGCNS_Descriptor bgcnsd; + +module_init(bgpdma_module_init); +module_exit(bgpdma_module_cleanup); + +/* ************************************************************************* */ +/* BG/P DMA initialization */ +/* ************************************************************************* */ + +/* dma physical address */ +#define _BGP_UA_DMA (0x6) +#define _BGP_PA_DMA (0x00000000) + +/* virtual kernel based address of DMA */ +void * bgpdma_kaddr; +EXPORT_SYMBOL(bgpdma_kaddr); + + +/* check if DMA is mapped by the kernel */ +#define CHECK_DMA_ACCESS if ( ! bgpdma_kaddr ) { printk( KERN_INFO "(E) DMA is not mapped\n"); return -ENODEV; } + + + +/* dma interrupt handler */ +/* static unsigned int dmaHandlerCount ; */ +irqreturn_t dmaIrqHandler(int irq, void * arg) +{ + struct dma_irq * irqInfo = ( struct dma_irq * )arg; + + +/* dmaHandlerCount += 1 ; */ +/* if( irq != 92 || dmaHandlerCount < 20 ) */ +/* { */ +/* printk( KERN_INFO "(I) bgpdma: rec fifo irq dmaIrqHandler called irq:%d arg:%08x\n", */ +/* irq, (int)arg); */ +/* // show_bic_regs() ; */ +/* } */ + (*irqInfo->func)(irqInfo->arg1,0,0,0); + return IRQ_HANDLED; +} + +/* irqreturn_t watermarkIrqHandler(int irq, void * arg) */ +/* { */ +/* struct dma_irq * irqInfo = ( struct dma_irq * )arg; */ +/* */ +/* */ +/* dmaHandlerCount += 1 ; */ +/* if( irq != 92 || dmaHandlerCount < 20 ) */ +/* { */ +/* printk( KERN_INFO "(I) bgpdma: rec fifo irq watermarkIrqHandler called irq:%d arg:%08x\n", */ +/* irq, (int)arg); */ +/* // show_bic_regs() ; */ +/* } */ +/* (*irqInfo->func)(irqInfo->arg1,0,0,0); */ +/* return IRQ_HANDLED; */ +/* } */ + +irqreturn_t dummyIrqHandler(int irq, void * arg) +{ + printk( KERN_INFO "(I) bgpdma: dummy irq handler called irq:%d arg:%08x\n", + irq, (int)arg); + return IRQ_HANDLED; +} + + +static int /*__init*/ bgpdma_module_init (void) +{ +/* int ret = -1; */ +/* dev_t devno; */ + + TRACE(( + KERN_INFO "bgpdma: module initialization\n" + )); + + bgpdma_dev.pa_addr = ((unsigned long long)_BGP_UA_DMA << 32) | _BGP_PA_DMA; + + /* map DMA into kernel space */ + + if ( bgcnsd.services->isIONode() ) + { + TRACE(( + KERN_INFO "(I) DMA is not mapped on IO node\n" + )); + bgpdma_kaddr = NULL; + return 0; + } + + bgpdma_kaddr = ioremap( bgpdma_dev.pa_addr, BGP_DMA_MMAP_SIZE ); + + if ( bgpdma_kaddr == NULL ) + { + printk( KERN_INFO "(E) bgpdma: vmap() failed\n" ); + return -ENOMEM; + } + + /* Let bgcnsd know about the new address of the dma */ + unsigned long flags; + local_irq_save(flags); + bgcnsd.services->mapDevice(BGCNS_DMA, bgpdma_kaddr ); + local_irq_restore(flags); + + + TRACE(( + KERN_INFO "bgpdma: module initialization finished, dma kaddr:%08x\n", + (unsigned)bgpdma_kaddr)); + + return 0; +} + +/* ************************************************************************* */ +/* BG/P net module cleanup */ +/* ************************************************************************* */ + +static void __exit + bgpdma_module_cleanup() +{ + + /* release kernel mapping of dma */ + iounmap ( bgpdma_kaddr ); +} + + + +/* + * Query free counter subgroups + */ +u32 Kernel_CounterGroupQueryFree( u32 type, + u32 grp, + u32 * num_subgrps, + u32 * subgrps ) +{ + CHECK_DMA_ACCESS; + + int ret = 0; + uint32_t counters; + int i; + + if ( grp < 0 || grp >= 4 || type < 0 || type > 1 ) return -EINVAL; + if ( num_subgrps == NULL || subgrps == NULL ) return -EINVAL; + + if ( type == 0 ) + counters = bgpdma_dev.state.inj_counters[grp]; + else + counters = bgpdma_dev.state.rec_counters[grp]; + + (*num_subgrps) = 0; + for(i=0; i < DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP; i++ ) + { + if ( ( counters & _BN(i) ) == 0) + { + subgrps[*num_subgrps] = i; + (*num_subgrps)++; + } + } + + TRACE(( + KERN_INFO "Allocated counters:%08x num_free:%d\n",counters,(int)num_subgrps)); + + return ret; +} +EXPORT_SYMBOL(Kernel_CounterGroupQueryFree); + + +/* + * Allocate counter subgroups + */ +u32 Kernel_CounterGroupAllocate( u32 type, + u32 grp, + u32 num_subgrps, + u32 * subgrps, + u32 target, /* not used */ + u32 handler, /* not used */ + u32 * handler_parm, /* not used */ + u32 interruptGroup, /* not used */ + u32 * cg ) +{ + CHECK_DMA_ACCESS; + + unsigned i,j; + u32 *counters; + u32 c_bits; + int min_id, max_id, word_id, bit_id, global_subgrp; + DMA_CounterGroup_t * cg_ptr = (DMA_CounterGroup_t *)cg; + if ( type > 1 ) return -EINVAL; + if ( grp >= 4 ) return -EINVAL; + if ( subgrps == NULL ) return -EINVAL; + if ( num_subgrps <= 0 ) return -EINVAL; + if ( num_subgrps > DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ) return -EINVAL; + if ( cg_ptr == NULL ) return -EINVAL; + + if ( type == DMA_Type_Injection ) + counters = &bgpdma_dev.state.inj_counters[grp]; + else + counters = &bgpdma_dev.state.rec_counters[grp]; + + c_bits = 0; + for(i=0;i< num_subgrps;i++) + { + if ( subgrps[i] < 0 ) return -EINVAL; + if (subgrps[i] >= DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ) return -EINVAL; + if ( *counters & _BN(subgrps[i]) ) + { + printk( KERN_WARNING + "bgpdma: tried to allocate busy counters grp:%d subgrps:%d\n", + grp, subgrps[i]); + return -EBUSY; + } + c_bits |= _BN(subgrps[i]); + } + + memset( cg_ptr, 0, sizeof(DMA_CounterGroup_t)); + cg_ptr->type = type; + cg_ptr->group_id = grp; + + if ( type == DMA_Type_Injection ) + cg_ptr->status_ptr = (DMA_CounterStatus_t *) _BGP_VA_iDMA_COUNTER_ENABLED(grp,0); + else + cg_ptr->status_ptr = (DMA_CounterStatus_t *) _BGP_VA_rDMA_COUNTER_ENABLED(grp,0); + + for(i=0;i< num_subgrps;i++) + { + min_id = subgrps[i] * DMA_NUM_COUNTERS_PER_SUBGROUP; + max_id = min_id + DMA_NUM_COUNTERS_PER_SUBGROUP; + global_subgrp = (grp * DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ) + subgrps[i]; + + cg_ptr->grp_permissions |= _BN( global_subgrp ); + for ( j = min_id; j < max_id; j++ ) + { + word_id = DMA_COUNTER_GROUP_WORD_ID(j); + bit_id = DMA_COUNTER_GROUP_WORD_BIT_ID(j); + cg_ptr->permissions[ word_id ] |= _BN(bit_id); + + if ( type == DMA_Type_Injection ) + { + cg_ptr->counter[j].counter_hw_ptr = + ( DMA_CounterHw_t *) _BGP_VA_iDMA_COUNTER(grp,j); + DMA_CounterSetValueBaseHw(cg_ptr->counter[j].counter_hw_ptr, 0, 0); + /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->counter); */ + /* CHECK_RET(ret); */ + /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->pa_base); */ + /* CHECK_RET(ret); */ + + TRACE(( + KERN_INFO "DMA Injection cntr allocated: %d(%08x)\n", + j,(unsigned)cg_ptr->counter[j].counter_hw_ptr)); + } + else + { + cg_ptr->counter[j].counter_hw_ptr = + ( DMA_CounterHw_t *) _BGP_VA_rDMA_COUNTER(grp,j); + DMA_CounterSetValueBaseMaxHw(cg_ptr->counter[j].counter_hw_ptr, 0, 0, 0); + /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->counter); */ + /* CHECK_RET(ret); */ + /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->pa_base); */ + /* CHECK_RET(ret); */ + /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->pa_max); */ + /* CHECK_RET(ret); */ + + TRACE(( + KERN_INFO "DMA Reception cntr allocated: %d(%08x)\n", + j,(unsigned)cg_ptr->counter[j].counter_hw_ptr)); + } + /* disable the counter, clear it's hit-zero */ + /* DMA_CounterSetDisableById ( cg_ptr,j ); */ + cg_ptr->status_ptr->disable[word_id] = _BN(bit_id); + /* ret = put_user( _BN(bit_id), &cg_ptr->status_ptr->disable[word_id] ); */ + /* CHECK_RET(ret); */ + /* DMA_CounterClearHitZeroById( &cg,j ); */ + cg_ptr->status_ptr->clear_hit_zero[word_id] = _BN(bit_id); + /* ret = put_user( _BN(bit_id), &cg_ptr->status_ptr->clear_hit_zero[word_id] ); */ + /* CHECK_RET(ret); */ + } + } + + _bgp_msync(); + + /* mark counters allocated in the global state */ + *counters |= c_bits; + + TRACE(( + KERN_INFO "Allocated counters:%08x\n",*counters)); + + return 0; +} +EXPORT_SYMBOL(Kernel_CounterGroupAllocate); + + +/* + * Query free inj fifos + */ +u32 Kernel_InjFifoGroupQueryFree( u32 grp, u32 * num_fifos, u32 * fifo_ids ) +{ + CHECK_DMA_ACCESS; + + int ret = 0; + u32 state; + int i; + + if ( grp >= DMA_NUM_INJ_FIFO_GROUPS ) return -EINVAL; + if ( num_fifos == NULL || fifo_ids == NULL ) return -EINVAL; + + state = bgpdma_dev.state.inj_fifos[grp]; + + (*num_fifos) = 0; + for(i=0;i< DMA_NUM_INJ_FIFOS_PER_GROUP;i++) + { + if ( ( state & _BN(i) ) == 0 ) + { + fifo_ids[(*num_fifos)] = i; + (*num_fifos)++; + TRACE(( + KERN_INFO "Free inj fifo: %d\n",i)); + } + } + + return ret; +} +EXPORT_SYMBOL(Kernel_InjFifoGroupQueryFree); + + +/* + * Allocate inj fifos from a group + */ +u32 Kernel_InjFifoGroupAllocate( u32 grp, + u32 num_fifos, + u32 * ids, + u16 * pri, + u16 * loc, + u8 * map, + u32 * fg ) +{ + CHECK_DMA_ACCESS; + + /* MUST be called when the DMA is inactive, prior to any DMA activity */ + int i; + u32 f_bits =0; + u32 p_bits =0; + u32 l_bits =0; + DMA_InjFifoGroup_t * fg_ptr = (DMA_InjFifoGroup_t *)fg; + + if ( fg_ptr == NULL ) return -EINVAL; + if ( grp < 0 || grp >= DMA_NUM_FIFO_GROUPS ) return -EINVAL; + if ( num_fifos <= 0 || num_fifos > DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL; + if ( ids == NULL || pri == NULL || map == NULL ) return -EINVAL; + + f_bits = 0; /* holds a bit vector of all fifos used in this allocation */ + for ( i = 0; i < num_fifos; i++ ) + { + if ( ids[i] >= DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL; + if ( pri[i] > 1 || loc[i] > 1 ) return -EINVAL; + if ( loc[i] == 0 && map[i] == 0 ) return -EINVAL; + if ( loc[i] == 1 && map[i] != 0 ) return -EINVAL; + + if ( bgpdma_dev.state.inj_fifos[grp] & _BN(ids[i]) ) + { + printk( KERN_WARNING + "bgpdma: tried to allocate busy inj fifos grp:%d fifo_id:%d\n", + grp, ids[i]); + return -EBUSY; + } + + f_bits |= _BN(ids[i]); + if ( loc[i] == 1 ) l_bits |= _BN(i); + if ( pri[i] == 1 ) p_bits |= _BN(i); + } + + + memset( fg_ptr, 0, sizeof(DMA_InjFifoGroup_t)); + fg_ptr->status_ptr = (DMA_InjFifoStatus_t *) _BGP_VA_iDMA_NOT_EMPTY(grp); + fg_ptr->group_id = grp; + fg_ptr->permissions |= f_bits; + + /* Disable interrupts and the injection FIFOs */ + unsigned long flags; + local_irq_save(flags); + bgcnsd.services-> + setDmaFifoControls( BGCNS_Disable,BGCNS_InjectionFifoInterrupt, grp,f_bits,NULL ); + bgcnsd.services-> + setDmaFifoControls( BGCNS_Disable,BGCNS_InjectionFifo, grp,f_bits,NULL ); + local_irq_restore(flags); + + /* deactivate all these fifos */ + fg_ptr->status_ptr->deactivate = f_bits; + /* ret = put_user( f_bits, &fg.status_ptr->deactivate ); */ + /* CHECK_RET(ret); */ + + _bgp_mbar(); /* make sure write is in the DMA */ + + local_irq_save(flags); + bgcnsd.services->setDmaInjectionMap( grp, (unsigned*)ids, map, num_fifos ); + local_irq_restore(flags); + + for ( i=0;i< num_fifos; i++) + { + fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr = + ( DMA_FifoHW_t *) _BGP_VA_iDMA_START(grp, ids[i]); + fg_ptr->fifos[ids[i]].fifo_id = ids[i]; + fg_ptr->fifos[ids[i]].desc_count = 0; + fg_ptr->fifos[ids[i]].occupiedSize = 0; + fg_ptr->fifos[ids[i]].priority = pri[i] ; + fg_ptr->fifos[ids[i]].local = loc[i]; + fg_ptr->fifos[ids[i]].ts_inj_map = map[i]; + + /* write 0's to the hw fifo */ + fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_start = 0; + /* ret = put_user( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_start ); */ + /* CHECK_RET(ret); */ + fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_head = 0; + /* ret = put_user ( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_head ); */ + /* CHECK_RET(ret); */ + fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_tail = 0; + /* ret = put_user( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_tail ); */ + /* CHECK_RET(ret); */ + fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_end = 0; + /* ret = put_user( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_end ); */ + /* CHECK_RET(ret); */ + +/* TRACE((KERN_INFO "Allocate inj fifo: %d",ids[i])); */ + } + + /* clear the threshold crossed */ + _bgp_mbar(); /* no previous write will pass this one */ + fg_ptr->status_ptr->clear_threshold_crossed = f_bits; + /* ret = put_user( f_bits, &fg.status_ptr->clear_threshold_crossed ); */ + /* CHECK_RET(ret); */ + + local_irq_save(flags); + /* set the local copy bits */ + bgcnsd.services->setDmaLocalCopies(BGCNS_Enable, grp, l_bits); + /* set the priority bits */ + bgcnsd.services->setDmaPriority(BGCNS_Enable, grp, p_bits); + + /* Enable interrupts for these fifos. */ + /* NOTE: enablement of the injection FIFO will take place during FIFO init. */ + /* _bgp_cns()->setDmaFifoControls( BGCNS_Enable, BGCNS_InjectionFifoInterrupt, grp, f_ids, NULL ); */ + local_irq_restore(flags); + + /* mark fifos allocated in the global state */ + bgpdma_dev.state.inj_fifos[grp] |= f_bits; + + return 0; +} +EXPORT_SYMBOL(Kernel_InjFifoGroupAllocate); + +/* + * General fifo init + */ +static inline int FifoInit( DMA_Fifo_t * f_ptr, + void * va_start, + void * va_head, + void * va_end ) +{ + int ret = 0; + uint32_t pa_start, pa_head, pa_end; + unsigned bytes; + + TRACE(( + KERN_INFO "FifoInit va_start:%08x va_head:%08x va_end:%08x\n", + (u32)va_start,(u32)va_head,(u32)va_end)); + + if ( f_ptr == NULL ) return -EINVAL; + if ( f_ptr->fifo_hw_ptr == NULL ) return -EINVAL; + if ( ((uint32_t)va_start & 0x1F) != 0 ) return -EINVAL; + if ( ((uint32_t)va_end & 0x1F) != 0 ) return -EINVAL; + if ( ((uint32_t)va_head & 0xF ) != 0 ) return -EINVAL; + + bytes = (uint32_t)va_end - (uint32_t)va_start; + + /* translate start address ( and check if the region is contigouos) */ + pa_start = virt_to_phys ( va_start ); +/* TRACE((KERN_INFO "bgpdma: FifoInit() va_start:%08x pa_start:%08x shifted:%08x", */ +/* (u32)va_start, pa_start, pa_start>>4 )); */ + pa_start >>= 4; /* we need 16-byte aligned address */ + + /* ret = VaTo4bitShiftedPa( va_start, bytes, &pa_start ); */ + /* CHECK_RET(ret); */ + + /* physical region is contigouos, we can compute pa_end and pa_head */ + pa_end = pa_start + ( bytes >> 4 ); + pa_head = pa_start + ( ((uint32_t)va_head - (uint32_t)va_start ) >> 4 ); + + /* Write the start, end , head and tail(= head) */ + f_ptr->fifo_hw_ptr->pa_start = pa_start; + /* ret = put_user ( pa_start, &f_ptr->fifo_hw_ptr->pa_start ); */ + /* CHECK_RET(ret); */ + f_ptr->fifo_hw_ptr->pa_head = pa_head; + /* ret = put_user( pa_head, &f_ptr->fifo_hw_ptr->pa_head ); */ + /* CHECK_RET(ret); */ + f_ptr->fifo_hw_ptr->pa_tail = pa_head; + /* ret = put_user( pa_head, &f_ptr->fifo_hw_ptr->pa_tail ); */ + /* CHECK_RET(ret); */ + f_ptr->fifo_hw_ptr->pa_end = pa_end; + /* ret = put_user( pa_end, &f_ptr->fifo_hw_ptr->pa_end ); */ + /* CHECK_RET(ret); */ + + _bgp_mbar(); + + /* Save the shadows in the structure */ + f_ptr->pa_start = pa_start; + f_ptr->va_start = va_start; + f_ptr->va_end = va_end; + f_ptr->va_head = va_head; + f_ptr->va_tail = va_head; + + /* Compute the free space */ + f_ptr->fifo_size = bytes >> 4; /* Number of 16B quads */ + f_ptr->free_space = f_ptr->fifo_size; + + return ret; +} + + +/* + * Initialize an injection fifo + */ +u32 Kernel_InjFifoInitById( u32 * fg, + int fifo_id, + u32 * va_start, + u32 * va_head, + u32 * va_end ) +{ + CHECK_DMA_ACCESS; + + int ret = 0; + int grp; + uint32_t x_phead, x_vstart, x_pstart, x_vtail; + DMA_InjFifoGroup_t * fg_ptr = (DMA_InjFifoGroup_t *)fg; + + if ( fg_ptr == NULL ) return -EINVAL; + if ( fifo_id < 0 || fifo_id >= DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL; + if ( va_start >= va_end || va_start > va_head || va_head > va_end ) return -EINVAL; + if ( (u32)va_head+DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES > (u32)va_end ) return -EINVAL; + if ( (u32)va_end - (u32)va_start < DMA_MIN_INJ_FIFO_SIZE_IN_BYTES ) return -EINVAL; + if ( ((u32)va_start & 0x1F) != 0 ) return -EINVAL; + if ( ((u32)va_end & 0x1F) != 0 ) return -EINVAL; + if ( ((u32)va_head & 0xF) != 0 ) return -EINVAL; + + if (( fg_ptr->permissions & _BN(fifo_id)) == 0 ) return -EBUSY; + + grp = fg_ptr->group_id; + + + /* Disable the injection FIFO and its interrupt: */ + unsigned long flags; + local_irq_save(flags); + bgcnsd.services-> + setDmaFifoControls(BGCNS_Disable, BGCNS_InjectionFifo, grp, _BN(fifo_id), NULL); + bgcnsd.services-> + setDmaFifoControls(BGCNS_Disable, BGCNS_InjectionFifoInterrupt, grp, _BN(fifo_id), NULL ); + local_irq_restore(flags); + + + /* Deactivate the fifo */ + fg_ptr->status_ptr->deactivate = _BN(fifo_id); + /* ret = put_user ( _BN(fifo_id), &fg.status_ptr->deactivate ); */ + /* CHECK_RET(ret); */ + + /* Initialize the fifo */ + ret = FifoInit( &fg_ptr->fifos[fifo_id].dma_fifo, va_start, va_head, va_end ); + CHECK_RET(ret); + + /* Initialize the descriptor count and occupied size */ + fg_ptr->fifos[fifo_id].desc_count = 0; + fg_ptr->fifos[fifo_id].occupiedSize = 0; + + /* clear the threshold crossed */ + fg_ptr->status_ptr->clear_threshold_crossed = _BN(fifo_id); + /* ret = put_user( _BN(fifo_id), &fg.status_ptr->clear_threshold_crossed ); */ + /* CHECK_RET(ret); */ + + /* read back something from the dma to ensure all writes have occurred */ + /* head should equal tail */ + x_phead = fg_ptr->fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head; + /* ret = get_user( x_phead, &fg.fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head ); */ + /* CHECK_RET(ret); */ + x_vstart = (uint32_t)(fg_ptr->fifos[fifo_id].dma_fifo.va_start); + x_pstart = (uint32_t)(fg_ptr->fifos[fifo_id].dma_fifo.pa_start); + x_vtail = (uint32_t)(fg_ptr->fifos[fifo_id].dma_fifo.va_tail); + if ( x_vstart + ( (x_phead - x_pstart) << 4 ) != x_vtail ) return -EIO; + + + + /* Enable the FIFO and its interrupt: */ + local_irq_save(flags); + bgcnsd.services-> + setDmaFifoControls(BGCNS_Enable, BGCNS_InjectionFifo, grp, _BN(fifo_id), NULL); + /* bgcnsd.services->setDmaFifoControls(BGCNS_Enable, BGCNS_InjectionFifoInterrupt, grp, _BN(fifo_id), NULL); */ + local_irq_restore(flags); + + /* Activate the fifo */ + fg_ptr->status_ptr->activate = _BN(fifo_id); + /* ret = put_user( _BN(fifo_id), &fg.status_ptr->activate ); */ + /* CHECK_RET(ret); */ + + return 0; +} +EXPORT_SYMBOL(Kernel_InjFifoInitById); + + +/* + * Free inj fifos + */ +uint32_t Kernel_InjFifoGroupFree(uint32_t grp, + uint32_t num_fifos, + uint32_t * fifo_ids, + uint32_t * fg) +{ + int ret = 0; + u32 f_bits =0; + int i; + DMA_InjFifoGroup_t * fg_ptr = (DMA_InjFifoGroup_t *)fg; + + if ( fg_ptr == NULL ) return -EINVAL; + if ( grp < 0 || grp >= DMA_NUM_FIFO_GROUPS ) return -EINVAL; + if ( num_fifos <= 0 || num_fifos > DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL; + if ( fifo_ids == NULL ) return -EINVAL; + + f_bits = 0; /* holds a bit vector of all fifos used in this allocation */ + for ( i = 0; i < num_fifos; i++ ) + { + if ( fifo_ids[i] >= DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL; + + if ( ! (bgpdma_dev.state.inj_fifos[grp] & _BN(fifo_ids[i])) ) + { + printk( KERN_WARNING + "bgpdma: tried to free a non-allocated inj fifo grp:%d fifo_id:%d\n", + grp, fifo_ids[i]); + return -EBUSY; + } + + f_bits |= _BN(fifo_ids[i]); + } + + for ( i=0;i< num_fifos; i++) + fg_ptr->fifos[fifo_ids[i]].dma_fifo.fifo_hw_ptr = NULL; + + fg_ptr->permissions ^= f_bits; + fg_ptr->status_ptr->deactivate = f_bits; + + return ret; +} + + + +/* + * Set the reception fifos map + */ +int Kernel_RecFifoSetMap( u32 * map ) +{ + CHECK_DMA_ACCESS; + + int i, g; + DMA_RecFifoMap_t * map_ptr = (DMA_RecFifoMap_t *)map; + + /* NEED TO PUT A LOCK AROUND THIS, Assume either the syscall mechanism does this */ + /* or it has to be put here */ + + /* MUST BE CALLED ONCE, Prior to Any DMA activity */ + /* Specifically, must be called after _bgp_DMA_Reset_Release */ + /* and prior to any _BGP_rDMA_Fifo_Get_Fifo_Group calls */ + + if ( map_ptr == NULL ) return -EINVAL; + if ( map_ptr->save_headers > 1 ) return -EINVAL; + + for (i=0; i< DMA_NUM_NORMAL_REC_FIFOS; i++) + if ( ( map_ptr->fifo_types[i] < 0 ) || ( map_ptr->fifo_types[i] > 1)) return -EINVAL; + + /* rec fifo map can be set only once */ + if ( bgpdma_dev.state.rec_fifo_set_map != 0 ) return -EBUSY; + + if ( map_ptr->save_headers == 1) + for (i=0; i< DMA_NUM_HEADER_REC_FIFOS; i++) + if ( ( map_ptr->hdr_fifo_types[i] <0 ) || ( map_ptr->hdr_fifo_types[i] > 1 )) + return -EINVAL; + + for (g=0; g< DMA_NUM_REC_FIFO_GROUPS;g++) + for (i=0; i< DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP; i++) + if ( map_ptr->ts_rec_map[g][i] >= DMA_NUM_NORMAL_REC_FIFOS) + return -EINVAL; + + TRACE(( + KERN_INFO "bgpdma: Kernel_RecFifoSetMap() disabling reception FIFO interrupts\n")); + + unsigned long flags; + local_irq_save(flags); + /* Disable the reception FIFOs */ + bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionFifo, 0 /* group not used */, 0xFFFFFFFF, NULL ); + bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifo, BGCNS_DMA_ALL_GROUPS, 0 /* mask not used */, NULL ); + + /* Set the map: */ + bgcnsd.services->setDmaReceptionMap(map_ptr->ts_rec_map, + map_ptr->fifo_types, + map_ptr->save_headers ? map_ptr->hdr_fifo_types : NULL, + map_ptr->threshold ); + + local_irq_restore(flags); + + /* Don't enable the fifos here, the fifo init will do that */ + bgpdma_dev.state.rec_fifo_set_map = 1; + + return 0; +} +EXPORT_SYMBOL(Kernel_RecFifoSetMap); + + +/* + * Get the reception fifos map + */ +int Kernel_RecFifoGetMap( u32 * map ) +{ + CHECK_DMA_ACCESS; + + int ret; + DMA_RecFifoMap_t * map_ptr = (DMA_RecFifoMap_t *)map; + + if ( map_ptr == NULL ) return -EINVAL; + + memset( map_ptr, 0, sizeof(DMA_RecFifoMap_t) ); + + unsigned long flags; + local_irq_save(flags); + + ret = bgcnsd.services->getDmaReceptionMap( map_ptr->ts_rec_map, + map_ptr->fifo_types, + &(map_ptr->save_headers), + map_ptr->hdr_fifo_types, + map_ptr->threshold); + + local_irq_restore(flags); + + CHECK_RET(ret); + + return 0; +} +EXPORT_SYMBOL(Kernel_RecFifoGetMap); + +/* + * Initialize a receiver fifo group + */ +int Kernel_RecFifoGetFifoGroup( u32 * fg, + int grp, /* group number */ + int target, /* not used */ + void * normal_handler, /* not used */ + void * normal_handler_parm,/* not used */ + void * header_handler, /* not used */ + void * header_handler_parm,/* not used */ + void * interruptGroup ) /* not used */ +{ + CHECK_DMA_ACCESS; + + int ret; + DMA_RecFifoMap_t map; + + uint32_t used_fifos; + int g,i,j,min_id,max_id,idx; + uint32_t x; + DMA_RecFifoGroup_t * fg_ptr = (DMA_RecFifoGroup_t *)fg; + + if ( fg_ptr == NULL ) return -EINVAL; + if ( grp < 0 || grp > DMA_NUM_REC_FIFO_GROUPS ) return -EINVAL; + /* if ( target < 0 || target > 4 ) return -EINVAL; */ + + memset( fg_ptr, 0, sizeof(DMA_RecFifoGroup_t) ); + + + /* get the map */ + unsigned long flags; + local_irq_save(flags); + ret = bgcnsd.services->getDmaReceptionMap( map.ts_rec_map, + map.fifo_types, + &(map.save_headers), + map.hdr_fifo_types, + map.threshold); + local_irq_restore(flags); + + CHECK_RET(ret); + + /* set the mask */ + fg_ptr->group_id = grp; + switch(grp) + { + case 0: fg_ptr->mask = 0xFF000000; break; + case 1: fg_ptr->mask = 0x00FF0000; break; + case 2: fg_ptr->mask = 0x0000FF00; break; + case 3: fg_ptr->mask = 0x000000FF; break; + } + + /* set the status pointer */ + fg_ptr->status_ptr = ( DMA_RecFifoStatus_t *) _BGP_VA_rDMA_NOT_EMPTY(grp,0); + + /* figure out which normal fifos are being used */ + min_id = (grp*DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP); + max_id = min_id +DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1; + + used_fifos = 0; + for (g=0;g< DMA_NUM_REC_FIFO_GROUPS;g++) + for(i=0;i<DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP;i++) + if ( ( map.ts_rec_map[g][i] >= min_id ) && (map.ts_rec_map[g][i] <= max_id) ) + used_fifos |= _BN(map.ts_rec_map[g][i]); + + idx = 0; + for(j= 0;j<DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP;j++) + { + i = min_id + j; + if ( ( _BN(i) & used_fifos) != 0 ) + { + fg_ptr->fifos[idx].type = map.fifo_types[i]; + fg_ptr->fifos[idx].global_fifo_id = i; + fg_ptr->fifos[idx].num_packets_processed_since_moving_fifo_head = 0; + fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr = ( DMA_FifoHW_t *) _BGP_VA_rDMA_START(grp,j); + /* Make sure this fifo is disabled */ + fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_start = 0; + /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_start ); */ + /* CHECK_RET(ret); */ + fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_head = 0; + /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_head ); */ + /* CHECK_RET(ret); */ + fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_tail = 0; + /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_tail ); */ + /* CHECK_RET(ret); */ + fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_end = 0; + /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_end ); */ + /* CHECK_RET(ret); */ + + idx++; + } + } /* j loop */ + + /* are we saving headers? */ + if ( map.save_headers == 1 ) + { + fg_ptr->num_hdr_fifos = 1; + fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].type = map.hdr_fifo_types[grp]; + fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].global_fifo_id = DMA_NUM_NORMAL_REC_FIFOS+grp; + fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].num_packets_processed_since_moving_fifo_head = 0; + fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr = + ( DMA_FifoHW_t *) _BGP_VA_rDMA_START(grp, DMA_HEADER_REC_FIFO_ID); + + fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_start = 0; + /* ret = */ + /* put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_start ); */ + /* CHECK_RET(ret); */ + + fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_head = 0; + /* ret = */ + /* put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_head ); */ + /* CHECK_RET(ret); */ + fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_tail = 0; + /* ret = */ + /* put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_tail ); */ + /* CHECK_RET(ret); */ + fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_end = 0; + /* ret = */ + /* put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_end ); */ + /* CHECK_RET(ret); */ + } + + fg_ptr->num_normal_fifos = idx; + fg_ptr->status_ptr->clear_threshold_crossed[0] = fg_ptr->mask; + /* ret = put_user( fg_ptr->mask, &fg_ptr->status_ptr->clear_threshold_crossed[0] ); */ + /* CHECK_RET(ret); */ + fg_ptr->status_ptr->clear_threshold_crossed[1] = fg_ptr->mask; + /* ret = put_user( fg_ptr->mask, &fg_ptr->status_ptr->clear_threshold_crossed[1] ); */ + /* CHECK_RET(ret); */ + + /* read back from the dma to ensure all writes have occurred */ + _bgp_mbar(); + x = fg_ptr->status_ptr->threshold_crossed[0]; + /* ret = get_user( x, &fg_ptr->status_ptr->threshold_crossed[0] ); */ + /* if ( ret ) return ret; */ + if ( (x & fg_ptr->mask) != 0 ) return -EIO; + + /* reenable interrupts, if necessary */ + /* */ + /* DCRs 0xD71, 0xD72, 0xD73, and 0xD74 contain bits indicating which */ + /* reception fifos will be enabled for interrupt 0, 1, 2, and 3, respectively. */ + /* These interrupts correspond to BIC interrupt group 2, IRQs 28, 29, 30, and */ + /* 31, respectively. Thus, if bit i is on in DCR 0xD7z, and rec fifo i's */ + /* free space drops below the threshold for that fifo, then IRQ 28 + (z-1) */ + /* will fire. */ + /* */ + /* For each reception fifo in this group, turn on bit i in DCR 0xD7z, where */ + /* z-1 is the group number. */ + /* */ + + used_fifos = 0; + for (i = 0; i < fg_ptr->num_normal_fifos; i++) + used_fifos |= _BN(fg_ptr->fifos[i].global_fifo_id); + + TRACE(( + KERN_INFO "bgpdma: Kernel_RecFifoGetFifoGroup() enabling reception FIFO interrupts\n")); + local_irq_save(flags); + + bgcnsd.services->setDmaFifoControls(BGCNS_Enable, + BGCNS_ReceptionFifoInterrupt, + fg_ptr->group_id, + used_fifos, + NULL); + + local_irq_restore(flags); + + + _bgp_msync(); + _bgp_isync(); + + + + return 0; +} +EXPORT_SYMBOL(Kernel_RecFifoGetFifoGroup); + +/* + * Initialize a reception fifo + */ +int Kernel_RecFifoInitById( u32 * fg, + int fifo_id, + void * va_start, + void * va_head, + void * va_end ) +{ + CHECK_DMA_ACCESS; + + int ret; + uint32_t st_word, st_mask; + uint32_t x_phead, x_vtail, x_vstart, x_pstart; + int i, grp, g_fifo_id; + DMA_RecFifoGroup_t * fg_ptr = (DMA_RecFifoGroup_t *)fg; + uint32_t xint[4] = {0,0,0,0}; + + if ( fg_ptr == NULL ) return -EINVAL; + if ( fifo_id < 0 || fifo_id >= DMA_NUM_REC_FIFOS_PER_GROUP ) return -EINVAL; + if ( va_start >= va_end || va_start > va_head || va_head > va_end ) return -EINVAL; + if ( ((u32)va_start & 0x1F) != 0 ) return -EINVAL; + if ( ((u32)va_end & 0x1F) != 0 ) return -EINVAL; + if ( ((u32)va_head & 0xF) != 0 ) return -EINVAL; + /* if ( (u32)va_end - (u32)va_start < DMA_MIN_REC_FIFO_SIZE_IN_BYTES ) return -EINVAL; */ + + /* + * Note: The reception fifos are in a disabled state upon return from + * DMA_RecFifoSetMap(), so we assume they are disabled at this point, + * making it safe to set the start, head, etc. + */ + + /* NOTE: This assumes the interrupt enables have been previously set as desired, */ + /* in _bgp_DMA_RecFifoGetFifoGroup, so we simply read those dcrs, disable all fifos, */ + /* and write them back at the end */ + + grp = fg_ptr->group_id; + g_fifo_id = fg_ptr->fifos[fifo_id].global_fifo_id; + + if ( g_fifo_id < DMA_NUM_NORMAL_REC_FIFOS) /* normal fifo */ + { + st_word = 0; /* status word for this fifo */ + st_mask = _BN(g_fifo_id) & fg_ptr->mask; /* status mask for this fifo */ + + /* see if this fifo has already been initialized */ + if ((bgpdma_dev.state.rec_fifo_init[st_word] & _BN(g_fifo_id)) !=0 ) return -EBUSY; + /* Disable the FIFO and all interrupts (interrupts will be restored below) */ + TRACE(( + KERN_INFO "bgpdma: Kernel_RecFifoInitById() disabling reception FIFO interrupts\n")); + unsigned long flags; + local_irq_save(flags); + bgcnsd.services->setDmaFifoControls( BGCNS_Disable, BGCNS_ReceptionFifo, 0 /* group not used */, _BN(g_fifo_id), NULL ); + + for (i=0; i<4; i++) + bgcnsd.services->setDmaFifoControls( BGCNS_Disable, BGCNS_ReceptionFifoInterrupt, i, 0xFFFFFFFF, &(xint[i]) ); /* save for re-enablement below */ + local_irq_restore(flags); + } + else /* header fifo */ + { + st_word = 1; /* status word for this fifo */ + st_mask = fg_ptr->mask; /* status mask for this fifo (only one bit is used by the HW) */ + + /* see if this fifo has already been initialized */ + if ( (bgpdma_dev.state.rec_fifo_init[st_word] & _BN(g_fifo_id-32)) != 0 ) + return -EBUSY; + + /* remember that this fifo has been initialized */ + bgpdma_dev.state.rec_fifo_init[st_word] |= _BN(g_fifo_id-32); + + /* Disable the reception header FIFO and its interrupts */ + TRACE(( + KERN_INFO "bgpdma: Kernel_RecFifoInitById() disabling reception header FIFO interrupts\n")); + unsigned long flags; + local_irq_save(flags); + bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifo, grp, 0 /* mask not used */, NULL ); + bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifoInterrupt, 0, 0xFFFFFFFF, xint ); + local_irq_restore(flags); + + } + + /* Initialize the fifo */ + ret = FifoInit( &fg_ptr->fifos[fifo_id].dma_fifo, va_start, va_head, va_end ); + CHECK_RET(ret); + + + /* remember that this fifo has been initialized */ + if ( g_fifo_id < DMA_NUM_NORMAL_REC_FIFOS ) /* normal fifo */ + bgpdma_dev.state.rec_fifo_init[0] |= _BN(g_fifo_id); + else /* header fifo */ + bgpdma_dev.state.rec_fifo_init[1] |= _BN(g_fifo_id-32); + + /* clear the threshold crossed */ + fg_ptr->status_ptr->clear_threshold_crossed[st_word] = st_mask; + /* ret = put_user( st_mask, &fg_ptr->status_ptr->clear_threshold_crossed[st_word] ); */ + /* CHECK_RET(ret); */ + + /* read back something from the dma to ensure all writes have occurred */ + /* head should equal tail */ + x_phead = fg_ptr->fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head; + /* ret = get_user( x_phead, &fg_ptr->fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head ); */ + /* CHECK_RET(ret); */ + x_vstart = (uint32_t)fg_ptr->fifos[fifo_id].dma_fifo.va_start; + x_pstart = (uint32_t)fg_ptr->fifos[fifo_id].dma_fifo.pa_start; + x_vtail = (uint32_t)fg_ptr->fifos[fifo_id].dma_fifo.va_tail; + if ( x_vstart + ( (x_phead - x_pstart) << 4 ) != x_vtail ) return -EIO; + + /* Enable the FIFO and re-enable interrupts */ + unsigned long flags; + local_irq_save(flags); + + if ( g_fifo_id < DMA_NUM_NORMAL_REC_FIFOS) { /* Normal fifo */ + TRACE(( + KERN_INFO "bgpdma: Kernel_RecFifoInitById() enabling reception FIFO interrupts\n")); + bgcnsd.services->setDmaFifoControls(BGCNS_Enable, BGCNS_ReceptionFifo, 0 /* group not used */, _BN(g_fifo_id), NULL); + + for (i=0; i<4; i++) + bgcnsd.services->setDmaFifoControls(BGCNS_Reenable, BGCNS_ReceptionFifoInterrupt, i, 0 /* mask not used */, &(xint[i]) ); /* Restore saved state */ + } + else { /* Header FIFO */ + TRACE(( + KERN_INFO "bgpdma: Kernel_RecFifoInitById() enabling reception header FIFO interrupts\n")); + bgcnsd.services->setDmaFifoControls(BGCNS_Enable, BGCNS_ReceptionHeaderFifo, grp, 0 /* mask not used */, NULL ); + /* bgcnsd.services->setDmaFifoControls(BGCNS_Reenable, BGCNS_ReceptionHeaderFifoInterrupt, 0, 0, xint ); */ + } + + local_irq_restore(flags); + + return 0; +} +EXPORT_SYMBOL(Kernel_RecFifoInitById); + +/* + * Register interrupt handlers + */ +int Kernel_SetCommThreadConfig(int irq, + int opcode, + LockBox_Counter_t cntrid, + Kernel_CommThreadHandler handler, + uint32_t arg1, + uint32_t arg2, + uint32_t arg3, + uint32_t arg4) +{ + int ret = 0; + int i; + + CHECK_PARAM( arg2 == 0 && arg3 == 0 && arg4 == 0 ); + + + + for ( i = 0; i < MAX_NUM_IRQ; i++ ) + if ( bgpdma_dev.irqInfo[i].irq == 0 || bgpdma_dev.irqInfo[i].irq == irq ) + break; + + if ( i == MAX_NUM_IRQ ) + { + printk(KERN_INFO "bgpdma: Kernel_SetCommThreadConfig: No more irq info slot\n" ); + return -ENOSPC; + } + + bgpdma_dev.irqInfo[i].func = handler; + bgpdma_dev.irqInfo[i].arg1 = arg1; + + if ( bgpdma_dev.irqInfo[i].irq == irq ) + { + TRACE(( + KERN_INFO "bgpdma: Kernel_SetCommThreadConfig: Re-registering handler " + "for irq:%d func:%08x arg1:%d\n",irq, (int)handler, arg1 )); + return 0; + } + + bgpdma_dev.irqInfo[i].irq = irq; + +/* bgp_dma_irq = irq ; */ +#if defined(TORNIC_TORUS_AFFINITY) + bic_set_cpu_for_irq(irq,k_TorusAffinityCPU) ; + TRACE(( + KERN_INFO "bgpdma: setting affinity irq=%d affinity=%d\n",irq, k_TorusAffinityCPU )); +#endif + + + ret = request_irq(irq, + dmaIrqHandler, + IRQF_DISABLED, + BGP_DMA_NAME, + &bgpdma_dev.irqInfo[i]); + + TRACE(( + KERN_INFO "bgpdma: request_irq irq=%d i=%d func=%p arg1=%08x ret=%d\n",irq, i, handler, arg1, ret )); + CHECK_RET(ret); + + TRACE(( + KERN_INFO "bgpdma: Kernel_SetCommThreadConfig() finished\n")); + return ret; +} + +EXPORT_SYMBOL(Kernel_SetCommThreadConfig) ; + +/* + * Remove commthread from the run queue ... not implemented + */ +int pthread_poof_np( void ) +{ + printk(KERN_INFO "bgpdma: pthread_poof_np() called !!! (bgp_dma.c:%d)\n", + __LINE__); + return 0; +} diff --git a/arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c b/arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c new file mode 100644 index 00000000000000..6f96f188276b46 --- /dev/null +++ b/arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c @@ -0,0 +1,1588 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2006,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/*! + * \file DMA_Descriptors.c + * + * \brief Implementations for Functions defined in bgp/arch/include/spi/DMA_Descriptors.h + */ +#include <linux/version.h> +#include <linux/module.h> + +#ifndef __LINUX_KERNEL__ + +#include <bpcore/bgp_types.h> + +/*! + * \brief For kernel_interface.h so that rts_get_personality gets defined + */ +#define SPI_DEPRECATED 1 +#include <spi/kernel_interface.h> + +#include <spi/DMA_Descriptors.h> +#include <spi/DMA_Counter.h> +#include <spi/DMA_InjFifo.h> +#include <spi/DMA_RecFifo.h> + +#include <spi/DMA_Assert.h> + +#ifdef __CNK__ +#include <cnk/PersUtils.h> +#endif + +#else + +#include <spi/linux_kernel_spi.h> + +#endif /* ! __LINUX_KERNEL__ */ + + +/*! + * \brief Static Info from Personality + * + * The following structure defines information from the personality. + * They are intended to be static so, once the info is retrieved from + * the personality, it does not need to be retrieved again (it is a + * system call to retrieve personality info). + * + * It is assumed that this is initialized to zero when the program is + * loaded. + * + */ +static DMA_PersonalityInfo_t personality_info; + + +/*! + * \brief Get Personality Information + * + * Gets personality information into the "personality_info" static structure. + * + * \post The personality information is retrieved into the structure + * + */ +void DMA_GetPersonalityInfo(void) +{ + _BGP_Personality_t *pers_ptr; + +#ifndef __CNK__ + + _BGP_Personality_t pers; + + rts_get_personality( &pers, + sizeof(pers) ); + + pers_ptr = &pers; + +#else + + pers_ptr = _bgp_GetPersonality(); + +#endif + + personality_info.nodeXCoordinate = pers_ptr->Network_Config.Xcoord; + personality_info.nodeYCoordinate = pers_ptr->Network_Config.Ycoord; + personality_info.nodeZCoordinate = pers_ptr->Network_Config.Zcoord; + personality_info.xNodes = pers_ptr->Network_Config.Xnodes; + personality_info.yNodes = pers_ptr->Network_Config.Ynodes; + personality_info.zNodes = pers_ptr->Network_Config.Znodes; + + _bgp_msync(); /* Ensure the info has been stored before setting the flag */ + personality_info.personalityRetrieved = 1; + _bgp_msync(); +} + + +/*! + * \brief Create a DMA Descriptor For a Torus Direct Put Message + * + * A torus direct put message is one that is sent to another node and its data + * is directly put into memory by the DMA on the destination node...it does + * not go into a reception fifo. + * + * A torus direct-put DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = DMA_CSUM_SKIP. + * - Sk = DMA_CSUM_BIT. + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 1 (Indicates a direct-put packet). + * - Dynamic = Set based on caller's "vc". + * - VC = Set to caller's "vc". + * - X,Y,Z = Set to caller's "x", "y", "z". + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = Destination message offset (from the reception + * counter's base address). Set to caller's recv_offset. + * - rDMA_Counter = Reception counter ID. This counter is located on the + * destination node and contains the base address of the + * message and the message length. Set based on caller's + * recv_ctr_grp_id and recv_ctr_id. + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] x The destination's x coordinate (8 bits). + * \param[in] y The destination's y coordinate (8 bits). + * \param[in] z The destination's z coordinate (8 bits). + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Both of x+ and x- cannot be set at the same + * time...same with y and z. + * \param[in] vc The virtual channel that the packet must go + * into if it fails to win the bypass + * arbitration in the receiving node. + * - 0 = Virtual channel dynamic 0 + * - 1 = Virtual channel dynamic 1 + * - 2 = Virtual channel deterministic bubble + * - 3 = Virtual channel deterministic priority + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_ctr_grp_id Reception counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] recv_ctr_id Reception counter ID (within the recv counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] recv_offset Offset of the payload from the pa_base + * associated with the specified reception + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, all payload bytes are included in the torus injection + * checksum. In the first byte of the torus hardware packet header, + * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0. + * The defaults can be changed by changing DMA_CSUM_SKIP and + * DMA_CSUM_BIT in this include file. + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_ctr_grp_id: + * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. +*/ +int DMA_TorusDirectPutDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int x, + unsigned int y, + unsigned int z, + unsigned int hints, + unsigned int vc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_ctr_grp_id, + unsigned int recv_ctr_id, + unsigned int recv_offset, + unsigned int msg_len + ) +{ + int c; + + SPI_assert( desc != NULL ); + SPI_assert( (hints & 0x0000003F) == hints ); + SPI_assert( vc <= 3 ); + SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + SPI_assert( recv_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( recv_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + +#ifndef NDEBUG + + if ( personality_info.personalityRetrieved == 0 ) + { + DMA_GetPersonalityInfo(); + } + + SPI_assert( x < personality_info.xNodes ); + SPI_assert( y < personality_info.yNodes ); + SPI_assert( z < personality_info.zNodes ); + +#endif + + DMA_ZeroOutDescriptor(desc); + + desc->idma_counterId = + inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */ + + desc->base_offset = send_offset; + desc->msg_length = msg_len; + + /* Torus Headers */ + + desc->hwHdr.CSum_Skip = DMA_CSUM_SKIP; /* Checksum all but header */ + desc->hwHdr.Sk = DMA_CSUM_BIT; /* Checksum entire packet */ + desc->hwHdr.Hint = hints; /* Hint Bits from caller */ + + DMA_SetDescriptorPids( desc, + recv_ctr_grp_id ); /* Pids based on recv group id */ + + c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */ + /* packet. */ + SPI_assert( c!=0 ); + desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */ + /* 7 for 8 chunks). */ + + desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */ + + DMA_SetVc( desc, + vc ); /* Virtual channel & Dynamic. */ + + desc->hwHdr.X = x; /* Destination coordinates */ + desc->hwHdr.Y = y; + desc->hwHdr.Z = z; + + desc->hwHdr.Put_Offset = recv_offset; + desc->hwHdr.rDMA_Counter = + recv_ctr_id + recv_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); + + /* Note: The desc->hwHrd3.Payload_Bytes field is set by the iDMA */ + +#ifdef DEBUG_MSG + Dump_InjDescriptor(desc); +#endif + + return 0; +} + + +/*! + * \brief Create a DMA Descriptor For a Local Direct Put Message + * + * A local direct put message is one that is targeted within the same node, and + * its data is directly put into memory by the DMA...it does not go into a + * reception fifo. This is essentially a memcpy via DMA. + * + * A local direct-put DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 1 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used). + * - Sk = 0 (not used). + * - Hint = 0 (not used). + * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id". + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 1 (Indicates a direct-put packet). + * - Dynamic = 0 (not used). + * - VC = 0 (not used). + * - X,Y,Z = 0 (not used). + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = Destination message offset (from the reception + * counter's base address). Set to caller's recv_offset. + * - rDMA_Counter = Reception counter ID. This counter is located on the + * destination node and contains the base address of the + * message and the message length.. Set based on caller's + * recv_ctr_grp_id and recv_ctr_id. + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_ctr_grp_id Reception counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] recv_ctr_id Reception counter ID (within the recv counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] recv_offset Offset of the payload from the pa_base + * associated with the specified reception + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_ctr_grp_id: + * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1) + * The only use for the pid bits is for debug, ie, if headers are + * being saved. + */ +int DMA_LocalDirectPutDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_ctr_grp_id, + unsigned int recv_ctr_id, + unsigned int recv_offset, + unsigned int msg_len + ) +{ + int c; + + SPI_assert( desc != NULL ); + SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + SPI_assert( recv_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( recv_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + + DMA_ZeroOutDescriptor(desc); + + desc->local_memcopy = 1; /* 1 bit */ + + desc->idma_counterId = + inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */ + + desc->base_offset = send_offset; + desc->msg_length = msg_len; + + /* Torus Headers */ + + DMA_SetDescriptorPids( desc, + recv_ctr_grp_id ); + + c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */ + /* packet. */ + SPI_assert( c!=0 ); + desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */ + /* 7 for 8 chunks). */ + + desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */ + + desc->hwHdr.Put_Offset = recv_offset; + desc->hwHdr.rDMA_Counter = + recv_ctr_id + recv_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); + + /* Note: The desc->hwHrd3.Payload_Bytes field is set by the iDMA */ + +#ifdef DEBUG_MSG + Dump_InjDescriptor(desc); +#endif + + return 0; +} + + +/*! + * \brief Create a DMA Descriptor For a Local L3 Prefetch Only Message + * + * A local prefetch is one in which the DMA simply prefetches the send buffer + * into L3. + * + * A local prefetch DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 1 + * - local_memcopy = 1 + * - idma_counterId = Injection counter ID associated with the message being + * prefetched. This counter contains the base address of + * the message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used). + * - Sk = 0 (not used). + * - Hint = 0 (not used). + * - Pid0, Pid1 = 0 (not used). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 1 (Indicates a DMA packet). + * - Dynamic = 0 (not used). + * - VC = 0 (not used). + * - X,Y,Z = 0 (not used). + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = 0 (not used). + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + */ +int DMA_LocalPrefetchOnlyDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int msg_len + ) +{ + int c; + + SPI_assert( desc != NULL ); + SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + + DMA_ZeroOutDescriptor(desc); + + desc->local_memcopy = 1; /* 1 bit */ + desc->prefetch_only = 1; /* 1 bit */ + + desc->idma_counterId = + inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */ + + desc->base_offset = send_offset; + desc->msg_length = msg_len; + + /* Torus Headers */ + c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */ + /* packet. */ + SPI_assert( c!=0 ); + desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */ + /* 7 for 8 chunks). */ + + desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */ + +#ifdef DEBUG_MSG + Dump_InjDescriptor(desc); +#endif + + return 0; +} + + +/*! + * \brief Create a DMA Descriptor For a Torus Remote-Get Message + * + * A torus remote-get message is one that is sent to another node and its data + * is directly put by the DMA into an injection fifo on the destination + * node...it does not go into a reception fifo. Therefore, the payload of this + * message is one (or more) descriptors for another message that is to be sent + * back to the originating node. + * + * By default, we assume that the payload of this remote get packet is a single + * descriptor. Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32. + * For remote gets whose payload is greater than 1 descriptor, the caller can + * change the packet Chunks and msg_length after this function builds the + * default descriptor. + * + * It is also assumed that the payload is NOT checksummed, since it is not + * always reproducible. Things like idma_counterId and base_offset may be + * different on another run, making checksumming inconsistent. + * + * A torus remote-get DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = 32. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used because Sk is 1). + * - Sk = 1 (do not checksum this packet). + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_inj_fifo_id" (see note). + * - Chunks = Set to (2)-1 = 1. + * - Dm = 1 (Indicates a DMA packet). + * - Dynamic = Set based on caller's "vc". + * - VC = Set to caller's "vc". + * - X,Y,Z = Set to caller's "x", "y", "z". + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 1. + * - iDMA_Fifo_ID = Injection fifo ID where the payload will be injected. + * Set based on caller's recv_inj_ctr_grp_id and + * recv_inj_ctr_id. + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] x The destination's x coordinate (8 bits). + * \param[in] y The destination's y coordinate (8 bits). + * \param[in] z The destination's z coordinate (8 bits). + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Both of x+ and x- cannot be set at the same + * time...same with y and z. + * \param[in] vc The virtual channel that the packet must go + * into if it fails to win the bypass + * arbitration in the receiving node. + * - 0 = Virtual channel dynamic 0 + * - 1 = Virtual channel dynamic 1 + * - 2 = Virtual channel deterministic bubble + * - 3 = Virtual channel deterministic priority + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_inj_fifo_grp_id Injection fifo group ID where payload + * will be injected on destination node + * (0 to DMA_NUM_INJ_FIFO_GROUPS-1). + * \param[in] recv_inj_fifo_id Injection fifo ID (within the + * recv_inj_fifo_grp_id group) + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_inj_fifo_grp_id: + * - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. + */ +int DMA_TorusRemoteGetDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int x, + unsigned int y, + unsigned int z, + unsigned int hints, + unsigned int vc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_inj_fifo_grp_id, + unsigned int recv_inj_fifo_id + ) +{ + + SPI_assert( desc != NULL ); + SPI_assert( (hints & 0x0000003F) == hints ); + SPI_assert( vc <= 3 ); + SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + SPI_assert( recv_inj_fifo_grp_id < DMA_NUM_INJ_FIFO_GROUPS ); + SPI_assert( recv_inj_fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP ); + +#ifndef NDEBUG + + if ( personality_info.personalityRetrieved == 0 ) + { + DMA_GetPersonalityInfo(); + } + + SPI_assert( x < personality_info.xNodes ); + SPI_assert( y < personality_info.yNodes ); + SPI_assert( z < personality_info.zNodes ); + +#endif + + DMA_ZeroOutDescriptor(desc); + + desc->idma_counterId = + inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */ + + desc->base_offset = send_offset; + desc->msg_length = 32; + + /* Torus Headers */ + + desc->hwHdr.Sk = 1; /* Don't checksum this packet */ + + desc->hwHdr.Hint = hints; /* Hint Bits from caller */ + + DMA_SetDescriptorPids( desc, + recv_inj_fifo_grp_id ); /* Pids based on recv fifo */ + /* group id */ + + desc->hwHdr.Chunks = 1; /* Size in Chunks of 32B 1 => 64 bytes */ + desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */ + + DMA_SetVc(desc,vc); /* Set virtual channel and dynamic */ + + desc->hwHdr.X = x; /* Destination coordinates */ + desc->hwHdr.Y = y; + desc->hwHdr.Z = z; + + desc->hwHdr.Flags = 0x1; /* Flags[7]=Remote-Get */ + desc->hwHdr.iDMA_Fifo_ID = /* Destination inj fifo ID */ + recv_inj_fifo_id + ( recv_inj_fifo_grp_id * DMA_NUM_INJ_FIFOS_PER_GROUP ); + +#ifdef DEBUG_MSG + Dump_InjDescriptor(desc); +#endif + + return 0; +} + + +/*! + * \brief Create a DMA Descriptor For a Local Remote-Get Message + * + * A local remote-get message is one whose data is directly put by the DMA into + * an injection fifo on the local node...it does not go into a reception fifo. + * Therefore, the payload of this message is one (or more) descriptors for + * another message that is to be injected on the local node. + * + * By default, we assume that the payload of this remote get packet is a single + * descriptor. Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32. + * For remote gets whose payload is greater than 1 descriptor, the caller can + * change the packet Chunks and msg_length after this function builds the + * default descriptor. + * + * A local remote-get DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 1 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = 32. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used). + * - Sk = 0 (not used). + * - Hint = 0 (Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_inj_fifo_id" (see note). + * - Chunks = Set to (2)-1 = 1. + * - Dm = 1 (Indicates a DMA packet). + * - Dynamic = 0 (not used). + * - VC = 0 (not used). + * - X,Y,Z = 0 (not used). + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 1. + * - iDMA_Fifo_ID = Injection fifo ID where the payload will be injected. + * Set based on caller's inj_ctr_grp_id and inj_ctr_id. + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_inj_fifo_grp_id Injection fifo group ID where payload + * will be injected on local node + * (0 to DMA_NUM_INJ_FIFO_GROUPS-1). + * \param[in] recv_inj_fifo_id Injection fifo ID (within the + * recv_inj_fifo_grp_id group) + * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the + * hardware packet header are determined by the recv_inj_fifo_grp_id: + * - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1) + * + */ +int DMA_LocalRemoteGetDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_inj_fifo_grp_id, + unsigned int recv_inj_fifo_id + ) +{ + + SPI_assert( desc != NULL ); + SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + SPI_assert( recv_inj_fifo_grp_id < DMA_NUM_INJ_FIFO_GROUPS ); + SPI_assert( recv_inj_fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP ); + + DMA_ZeroOutDescriptor(desc); + + desc->local_memcopy = 1; /* 1 bit */ + + desc->idma_counterId = + inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */ + desc->base_offset = send_offset; + desc->msg_length = 32; + + /* Torus Headers */ + DMA_SetDescriptorPids( desc, + recv_inj_fifo_grp_id ); /* Pids based on recv fifo */ + /* group id */ + + desc->hwHdr.Chunks = 1; /* Size in Chunks of 32B 1 => 64 bytes */ + desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */ + + desc->hwHdr.Flags = 0x1; /* Flags[7]=Remote-Get */ + desc->hwHdr.iDMA_Fifo_ID = /* Destination inj fifo ID */ + recv_inj_fifo_id + ( recv_inj_fifo_grp_id * DMA_NUM_INJ_FIFOS_PER_GROUP ); + + return 0; +} + + +/*! + * \brief Create a DMA Descriptor For a Torus Memory Fifo Message + * + * A torus memory fifo message is one that is sent to another node and its data + * is put into a reception memory fifo by the DMA on the destination node. + * + * A torus memory fifo DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = DMA_CSUM_SKIP. + * - Sk = DMA_CSUM_BIT. + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 0 (Indicates a memory fifo packet). + * - Dynamic = Set based on caller's "vc". + * - VC = Set to caller's "vc". + * - X,Y,Z = Set to caller's "x", "y", "z". + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (initialized to 0, and unchanged in the first packet. + * Increased by 240 in each subsequent packet, reflecting + * the number of bytes transferred in all previous + * packets). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = 0 (not used). + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg. + * - Func_Id = The registration ID of a function to receive control + * on the destination node to process the packet. + * Set to caller's function_id. + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] x The destination's x coordinate (8 bits). + * \param[in] y The destination's y coordinate (8 bits). + * \param[in] z The destination's z coordinate (8 bits). + * \param[in] recv_fifo_grp_id Reception fifo group ID + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Both of x+ and x- cannot be set at the same + * time...same with y and z. + * \param[in] vc The virtual channel that the packet must go + * into if it fails to win the bypass + * arbitration in the receiving node. + * - 0 = Virtual channel dynamic 0 + * - 1 = Virtual channel dynamic 1 + * - 2 = Virtual channel deterministic bubble + * - 3 = Virtual channel deterministic priority + * \param[in] sw_arg User-defined 24 bits to be placed into the + * packets (bits 8-31). + * \param[in] function_id Function id (8 bit registration ID) of the + * function to receive control on the + * destination node to process packets for this + * message. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, all payload bytes are included in the torus injection + * checksum. In the first byte of the torus hardware packet header, + * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0. + * The defaults can be changed by changing DMA_CSUM_SKIP and + * DMA_CSUM_BIT in this include file. + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_fifo_grp_id: + * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. +*/ +int DMA_TorusMemFifoDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int x, + unsigned int y, + unsigned int z, + unsigned int recv_fifo_grp_id, + unsigned int hints, + unsigned int vc, + unsigned int sw_arg, + unsigned int function_id, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int msg_len + ) +{ + int c; + + SPI_assert( desc != NULL ); + SPI_assert( (hints & 0x0000003F) == hints ); + SPI_assert( vc <= 3 ); + SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + SPI_assert( recv_fifo_grp_id < DMA_NUM_REC_FIFO_GROUPS ); + +#ifndef NDEBUG + + if ( personality_info.personalityRetrieved == 0 ) + { + DMA_GetPersonalityInfo(); + } + + SPI_assert( x < personality_info.xNodes ); + SPI_assert( y < personality_info.yNodes ); + SPI_assert( z < personality_info.zNodes ); + +#endif + + DMA_ZeroOutDescriptor(desc); + + desc->idma_counterId = + inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */ + + desc->base_offset = send_offset; + desc->msg_length = msg_len; + + /* Torus Headers */ + + desc->hwHdr.CSum_Skip = DMA_CSUM_SKIP; /* Checksum all but header */ + desc->hwHdr.Sk = DMA_CSUM_BIT; /* Checksum entire packet */ + desc->hwHdr.Hint = hints; /* Hint Bits from caller */ + + DMA_SetDescriptorPids( desc, + recv_fifo_grp_id ); /* Pids based on recv group id */ + + c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */ + /* packet. */ + SPI_assert( c!=0 ); + desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */ + /* 7 for 8 chunks). */ + + DMA_SetVc( desc, + vc ); /* Virtual channel & Dynamic. */ + + desc->hwHdr.X = x; /* Destination coordinates */ + desc->hwHdr.Y = y; + desc->hwHdr.Z = z; + + desc->hwHdr.SW_Arg = sw_arg; /* User-defined */ + desc->hwHdr.Func_Id = function_id; /* Registration id */ + +#ifdef DEBUG_MSG + Dump_InjDescriptor(desc); +#endif + + return 0; +} + + +/*! + * \brief Create a DMA Descriptor For a Local Memory Fifo Message + * + * A local memory fifo message is one whose data is put into a reception + * memory fifo on the same node by the DMA. + * + * A local memory fifo DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 1 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = 0 (not used). + * - Sk = 0 (not used). + * - Hint = 0 (not used). + * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 0 (Indicates a memory fifo packet). + * - Dynamic = 0 (not used). + * - VC = 0 (not used). + * - X,Y,Z = 0 (not used). + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = 0 (not used). + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg. + * - Func_Id = The registration ID of a function to receive control + * on this local node to process the packet. + * Set to caller's function_id. + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] recv_fifo_grp_id Reception fifo group ID + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * \param[in] sw_arg User-defined 24 bits to be placed into the + * packets (bits 8-31). + * \param[in] function_id Function id (8 bit registration ID) of the + * function to receive control on this + * local node to process packets for this + * message. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_fifo_grp_id: + * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1) +*/ +int DMA_LocalMemFifoDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int recv_fifo_grp_id, + unsigned int sw_arg, + unsigned int function_id, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int msg_len + ) +{ + int c; + + SPI_assert( desc != NULL ); + SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + SPI_assert( recv_fifo_grp_id < DMA_NUM_REC_FIFO_GROUPS ); + + DMA_ZeroOutDescriptor(desc); + + desc->local_memcopy = 1; /* 1 bit */ + + desc->idma_counterId = + inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */ + + desc->base_offset = send_offset; + desc->msg_length = msg_len; + + /* Torus Headers */ + DMA_SetDescriptorPids( desc, + recv_fifo_grp_id ); /* Pids based on recv group id */ + + c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */ + /* packet. */ + SPI_assert( c!=0 ); + desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */ + /* 7 for 8 chunks). */ + + desc->hwHdr.SW_Arg = sw_arg; /* User-defined */ + desc->hwHdr.Func_Id = function_id; /* Registration id */ + +#ifdef DEBUG_MSG + Dump_InjDescriptor(desc); +#endif + + return 0; +} + + +/*! + * \brief Create a DMA Descriptor For a Torus Direct Put Broadcast Message + * + * A torus direct put broadcast message is one that is sent to all of the nodes + * in a specified direction along a specified line, its data + * is directly put into memory on the nodes along that line by the DMA on those + * nodes...it does not go into a reception fifo. Only one hint bit can be + * specified, dictating the direction (plus or minus) and line (x, y, or z). + * + * By default, the packet is included in the checksum. Retransmitted packets + * should not be included in the checksum. + * + * By default, the deterministic bubble normal virtual channel is used. + * + * A torus direct-put broadcast DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = DMA_CSUM_SKIP. + * - Sk = DMA_CSUM_BIT. + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 1 (Indicates a direct-put packet). + * - Dynamic = 0 (Deterministic). + * - VC = Virtual Channel: Deterministic Bubble Normal. + * - X,Y,Z = Set according to the hints: + * Two of the directions are set to this node's + * coordinates (no movement in those directions). + * One direction is set to the dest specified + * by the caller. + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = Destination message offset (from the reception + * counter's base address). Set to caller's recv_offset. + * - rDMA_Counter = Reception counter ID. This counter is located on the + * destination node and contains the base address of the + * message and the message length. Set based on caller's + * recv_ctr_grp_id and recv_ctr_id. + * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA. + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - Func_Id = 0 (not used). + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] dest The final torus destination coordinate + * along the line specified by the hints. + * Should not exceed the number of nodes in + * the direction of travel. + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Only one bit may be specified. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] recv_ctr_grp_id Reception counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] recv_ctr_id Reception counter ID (within the recv counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] recv_offset Offset of the payload from the pa_base + * associated with the specified reception + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, all payload bytes are included in the torus injection + * checksum. In the first byte of the torus hardware packet header, + * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0. + * The defaults can be changed by changing DMA_CSUM_SKIP and + * DMA_CSUM_BIT in this include file. + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_ctr_grp_id: + * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. +*/ +int DMA_TorusDirectPutBcastDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int dest, + unsigned int hints, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int recv_ctr_grp_id, + unsigned int recv_ctr_id, + unsigned int recv_offset, + unsigned int msg_len + ) +{ + + int dest_x,dest_y,dest_z; + + SPI_assert( desc != NULL ); + SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + SPI_assert( recv_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( recv_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + + /* + * Previous code to retrieve our node's x,y,z coords: + * BGLPartitionGetCoords( &dest_x, &dest_y, &dest_z ); + * + * If the node's x,y,z coordinates have not yet been retrieved from the + * personality, go get the personality and set the DMA_NodeXCoordinate, + * DMA_NodeYCoordinate, and DMA_NodeZCoordinate static variables from + * the personality info. Then, use this to init the dest_x,y,z variables. + */ + if ( personality_info.personalityRetrieved == 0 ) + { + DMA_GetPersonalityInfo(); + } + + dest_x = personality_info.nodeXCoordinate; + dest_y = personality_info.nodeYCoordinate; + dest_z = personality_info.nodeZCoordinate; + + /* + * Examine the hint bits specified by the caller: + * - Ensure only one of them is specified + * - Ensure dest is valid for the direction of the broadcast + * - Override x, y, or z with dest for the specified direction + */ + + switch(hints) { + + case DMA_PACKET_HINT_XP: + case DMA_PACKET_HINT_XM: + dest_x = dest; + SPI_assert( dest <= personality_info.xNodes ); + break; + + case DMA_PACKET_HINT_YP: + case DMA_PACKET_HINT_YM: + dest_y = dest; + SPI_assert( dest <= personality_info.yNodes ); + break; + + case DMA_PACKET_HINT_ZP: + case DMA_PACKET_HINT_ZM: + dest_z = dest; + SPI_assert( dest <= personality_info.zNodes ); + break; + + default: + SPI_assert(0); + + } + + /* Build the descriptor */ + DMA_TorusDirectPutDescriptor(desc, + dest_x, + dest_y, + dest_z, + hints, + DMA_PACKET_VC_BN, + inj_ctr_grp_id, + inj_ctr_id, + send_offset, + recv_ctr_grp_id, + recv_ctr_id, + recv_offset, + msg_len); + + /* set the deposit bit */ + desc->hwHdr.Dp =1; + + + return 0; +} + + + + +/*! + * \brief Create a DMA Descriptor For a Torus Memory Fifo Broadcast Message + * + * A torus memory fifo broadcast message is one that is sent to all of the nodes + * in a specified direction along a specified line, its data is + * put into a reception memory fifo by the DMA on the destination nodes along + * that line. Only one hint bit can be specified, dictating the direction + * (plus or minus) and line (x, y, or z). + * + * By default, the packet is included in the checksum. Retransmitted packets + * should not be included in the checksum. + * + * By default, the deterministic bubble normal virtual channel is used. + * + * A torus memory fifo broadcast DMA descriptor contains the following: + * + * - 16 bytes of control information: + * - prefetch_only = 0 + * - local_memcopy = 0 + * - idma_counterId = Injection counter ID associated with the data being + * sent. This counter contains the base address of the + * message and the message length. Set based on caller's + * inj_ctr_grp_id and inj_ctr_id. + * - base_offset = Message offset (from the injection counter's base + * address). Set to caller's send_offset. + * - msg_length = Message length. Set to caller's msg_len. + * + * - 8 byte torus hardware header + * - CSum_Skip = DMA_CSUM_SKIP. + * - Sk = DMA_CSUM_BIT. + * - Hint = Set to caller's "hints". + * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note). + * - Chunks = Set to largest size consistent with msg_len. + * - Dm = 0 (Indicates a memory fifo packet). + * - Dynamic = 0 (Deterministic). + * - VC = Virtual Channel: Deterministic Bubble Normal. + * - X,Y,Z = Set according to the hints: + * Two of the directions are set to this node's + * coordinates (no movement in those directions). + * One direction is set to the dest specified + * by the caller. + * + * - 8 byte software header (initial values used by iDMA). + * - Put_Offset = 0 (not used). + * - rDMA_Counter = 0 (not used). + * - Payload_Bytes = 0 (not used). + * - Flags = Pacing = 0. + * Remote-Get = 0. + * - iDMA_Fifo_ID = 0 (not used). + * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg. + * - Func_Id = The registration ID of a function to receive control + * on the destination node to process the packet. + * Set to caller's function_id. + * + * This function creates the above descriptor. + * + * \param[in,out] desc Pointer to the storage where the descriptor + * will be created. + * \param[in] dest The final torus destination coordinate + * along the line specified by the hints. + * Should not exceed the number of nodes in + * the direction of travel. + * \param[in] recv_fifo_grp_id Reception fifo group ID + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * \param[in] hints Hint bits for torus routing (6 bits). + * Each bit corresponds to x+, x-, y+, y-, + * z+, z-. If a bit is set, it indicates that + * the packet wants to travel along the + * corresponding direction. If all bits are + * zero, the hardware calculates the hint bits. + * Only one bit may be specified. + * \param[in] sw_arg User-defined 24 bits to be placed into the + * packets (bits 8-31). + * \param[in] function_id Function id (8 bit registration ID) of the + * function to receive control on the + * destination node to process packets for this + * message. + * \param[in] inj_ctr_grp_id Injection counter group ID + * (0 to DMA_NUM_COUNTER_GROUPS-1). + * \param[in] inj_ctr_id Injection counter ID (within the inj counter + * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1). + * \param[in] send_offset Offset of the send payload from the pa_base + * associated with the specified injection + * counter. + * \param[in] msg_len Total message length (in bytes). + * + * \retval 0 Success + * \retval non-zero Failure + * + * \note By default, all payload bytes are included in the torus injection + * checksum. In the first byte of the torus hardware packet header, + * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0. + * The defaults can be changed by changing DMA_CSUM_SKIP and + * DMA_CSUM_BIT in this include file. + * + * \note By default, the packet size is set to the largest value consistent + * with the message size. For example, + * - if msg_len >= 209, there will be 8 32-byte chunks in each packet, + * with the possible exception of the last packet, which could contain + * fewer chunks (209... of payload + 16 header). + * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc. + * + * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the + * torus hardware packet header are determined by the recv_fifo_grp_id: + * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0) + * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1) + * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0) + * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1) + * Pid0 determines into which physical torus fifo group on the destination + * node the packet is put, prior to the dma receiving it. Other than that, + * the only use for the pid bits is for debug, ie, if headers are being + * saved. +*/ +int DMA_TorusMemFifoBcastDescriptor( + DMA_InjDescriptor_t *desc, + unsigned int dest, + unsigned int recv_fifo_grp_id, + unsigned int hints, + unsigned int sw_arg, + unsigned int function_id, + unsigned int inj_ctr_grp_id, + unsigned int inj_ctr_id, + unsigned int send_offset, + unsigned int msg_len + ) +{ + int dest_x,dest_y,dest_z; + + SPI_assert( desc != NULL ); + SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS ); + SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP ); + SPI_assert( recv_fifo_grp_id < DMA_NUM_COUNTER_GROUPS ); + + /* + * Previous code to retrieve our node's x,y,z coords: + * BGLPartitionGetCoords( &dest_x, &dest_y, &dest_z ); + * + * If the node's x,y,z coordinates have not yet been retrieved from the + * personality, go get the personality and set the DMA_NodeXCoordinate, + * DMA_NodeYCoordinate, and DMA_NodeZCoordinate static variables from + * the personality info. Then, use this to init the dest_x,y,z variables. + */ + if ( personality_info.personalityRetrieved == 0 ) + { + DMA_GetPersonalityInfo(); + } + + dest_x = personality_info.nodeXCoordinate; + dest_y = personality_info.nodeYCoordinate; + dest_z = personality_info.nodeZCoordinate; + + /* + * Examine the hint bits specified by the caller: + * - Ensure only one of them is specified + * - Ensure dest is valid for the direction of the broadcast + * - Override x, y, or z with dest for the specified direction + */ + + switch(hints) { + + case DMA_PACKET_HINT_XP: + case DMA_PACKET_HINT_XM: + dest_x = dest; + SPI_assert( dest <= personality_info.xNodes ); + break; + + case DMA_PACKET_HINT_YP: + case DMA_PACKET_HINT_YM: + dest_y = dest; + SPI_assert( dest <= personality_info.yNodes ); + break; + + case DMA_PACKET_HINT_ZP: + case DMA_PACKET_HINT_ZM: + dest_z = dest; + SPI_assert( dest <= personality_info.zNodes ); + break; + + default: + SPI_assert(0); + + } + + /* Build the descriptor */ + DMA_TorusMemFifoDescriptor( + desc, + dest_x, + dest_y, + dest_z, + recv_fifo_grp_id, + hints, + DMA_PACKET_VC_BN, + sw_arg, + function_id, + inj_ctr_grp_id, + inj_ctr_id, + send_offset, + msg_len); + + /* set the deposit bit */ + desc->hwHdr.Dp =1; + + + return 0; +} +EXPORT_SYMBOL(DMA_GetPersonalityInfo) ; +EXPORT_SYMBOL(DMA_TorusDirectPutDescriptor) ; +EXPORT_SYMBOL(DMA_LocalDirectPutDescriptor) ; +EXPORT_SYMBOL(DMA_LocalPrefetchOnlyDescriptor) ; +EXPORT_SYMBOL(DMA_TorusRemoteGetDescriptor) ; +EXPORT_SYMBOL(DMA_LocalRemoteGetDescriptor) ; +EXPORT_SYMBOL(DMA_TorusMemFifoDescriptor) ; +EXPORT_SYMBOL(DMA_LocalMemFifoDescriptor) ; +EXPORT_SYMBOL(DMA_TorusDirectPutBcastDescriptor) ; +EXPORT_SYMBOL(DMA_TorusMemFifoBcastDescriptor) ; diff --git a/arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c b/arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c new file mode 100644 index 00000000000000..34c85501a121b2 --- /dev/null +++ b/arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c @@ -0,0 +1,206 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/*! \file DMA_InjFifo.c + * + * \brief Implementations for Functions Defined in bgp/arch/include/spi/DMA_InjFifo.h. + * + */ + +#undef DEBUG_PRINT +/* #define DEBUG_PRINT 1 */ + +#ifndef __LINUX_KERNEL__ + +#include <common/bgp_personality_inlines.h> +#include <spi/bgp_SPI.h> +#include <stdio.h> +#include <errno.h> + +#else + +#include <spi/linux_kernel_spi.h> + +#endif /* ! __LINUX_KERNEL__ */ + +/*! + * + * \brief Remote Get Fifo Full Handler Table + * + * An array of entries, one per injection fifo. Each entry specifies the fifo + * group structure and the handler function that will receive control to + * handle a remote get fifo full condition for fifos in that fifo group. + */ +DMA_InjFifoRgetFifoFullHandlerEntry_t DMA_RgetFifoFullHandlerTable[DMA_NUM_INJ_FIFOS]; + + +/*! + * \brief Remote Get Fifo Full Init Has Been Done Indicator + * + * 0 means the initialization has not been done. + * 1 means the initialization has been done. + */ +int DMA_InjFifoRgetFifoFullInitHasBeenDone = 0; + + +/*! + * \brief Pointer to Barrier function Used By Remote Get Fifo Full Interrupt Handler + */ +static void (*DMA_RgetFifoFullHandlerBarrierFcn)(void *); +/*! + * \brief Generic arg for Barrier function + */ +static void *DMA_RgetFifoFullHandlerBarrierArg; + + +/*! + * \brief Remote Get Fifo Full Interrupt Handler + * + * This function receives control when a remote get fifo becomes full + * It attempts to recover from the condition and restart the DMA. + * It receives control in all cores (a broadcast interrupt). + * + * Upon entry, the DMA is assumed to have been stopped, both the iDMA + * and the rDMA. This has been done by the kernel's interrupt + * handler that invoked this function. + */ +void DMA_InjFifoRgetFifoFullInterruptHandler(uint32_t arg1, + uint32_t arg2, + uint32_t arg3, + uint32_t arg4) +{ + uint32_t global_fnum, freeSpaceInBytes; + uint32_t core_num = Kernel_PhysicalProcessorID(); + + /* If Init has not been done yet, ignore the interrupt. + */ + if ( DMA_InjFifoRgetFifoFullInitHasBeenDone == 0 ) + { + pthread_poof_np(); /* Return from this interrupt. */ + } + + /* + * Barrier across all cores. This is needed to ensure that + * 1. The DMA has been stopped (only the last core to see this interrupt + * stops the DMA). + * 2. We don't exit from this handler until the core that needs to handle + * the rget fifo full condition has cleared the condition causing the + * interrupt, or else it will fire right away again. + * + * This barrier, while allocated by the main core of each process on the + * compute node, has been modified during DMA SPI Setup to expect the + * appropriate number of cores to participate. + */ + + DMA_RgetFifoFullHandlerBarrierFcn( DMA_RgetFifoFullHandlerBarrierArg ); + + /* + * For each injection fifo... + * For each entry of the RgetFifoFullHandlerTable that is managed + * by our core and has a registered rget fifo full handler, + * 1. Determine whether this rget fifo is full (or nearly so) + * 2. If full, call the registered handler to handle the condition. + */ + for ( global_fnum=0; global_fnum<DMA_NUM_INJ_FIFOS; global_fnum++) + { + if ( ( DMA_RgetFifoFullHandlerTable[global_fnum].core_num == core_num ) && + ( DMA_RgetFifoFullHandlerTable[global_fnum].handler ) ) + { + /* The rget fifo is considered full (or nearly so) if there is + * only enough freespace in the fifo to hold one descriptor or less. + */ + freeSpaceInBytes = + DMA_InjFifoGetFreeSpaceById ( + DMA_RgetFifoFullHandlerTable[global_fnum].fg_ptr, + global_fnum & 0x1f, /* relative fifo number */ + 1, + 1) << 4; + if ( freeSpaceInBytes <= (DMA_MIN_INJECT_SIZE_IN_QUADS*16) + + DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES ) + { + /* + * Call the handler function to free up space in the fifo, + * if possible. + */ + + (*(DMA_RgetFifoFullHandlerTable[global_fnum].handler))( + DMA_RgetFifoFullHandlerTable[global_fnum].fg_ptr, + global_fnum & 0x1F, + DMA_RgetFifoFullHandlerTable[global_fnum].handler_parm); + } + } + } + + /* + * Barrier. Wait here until all cores reach this point in the interrupt + * handler. + */ + + DMA_RgetFifoFullHandlerBarrierFcn( DMA_RgetFifoFullHandlerBarrierArg ); + + /* + * Exit from the interrupt. + */ + pthread_poof_np(); +} + +/*! + * \brief Remote Get Fifo Full Initialization + * + * Initialize data structures and interrupt handlers to handle a remote get + * fifo full condition. + */ +void DMA_InjFifoRgetFifoFullInit( Kernel_InterruptGroup_t rget_interruptGroup, + void (*rget_barrier)(void *) , + void *rget_barrier_arg + ) +{ + int i; + + /* + * Clear the handler table. + */ + for ( i=0; i<DMA_NUM_INJ_FIFOS; i++ ) + { + DMA_RgetFifoFullHandlerTable[i].fg_ptr = NULL; + DMA_RgetFifoFullHandlerTable[i].handler = NULL; + DMA_RgetFifoFullHandlerTable[i].handler_parm = NULL; + DMA_RgetFifoFullHandlerTable[i].core_num = 0; + } + + /* + * Clear the lockbox counter associated with this interrupt. + * The lockbox keeps track of which cores have entered and exited + * the kernel's interrupt handler. + */ + LockBox_FetchAndClear( rget_interruptGroup ); + + DMA_RgetFifoFullHandlerBarrierFcn = rget_barrier; + DMA_RgetFifoFullHandlerBarrierArg = rget_barrier_arg; + + /* + * Register the interrupt handler to handle the remote get + * fifo full condition. + */ + Kernel_SetCommThreadConfig(Kernel_MkInterruptID(_BGP_IC_DMA_NFT_G3_HIER_POS, 24), + COMMTHRD_OPCODE_BCAST | + COMMTHRD_OPCODE_CALLFUNC, + rget_interruptGroup, + DMA_InjFifoRgetFifoFullInterruptHandler, + 0, 0, 0, 0); +} diff --git a/arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c b/arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c new file mode 100644 index 00000000000000..c97fd8f13eed75 --- /dev/null +++ b/arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c @@ -0,0 +1,3016 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2006,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + ********************************************************************/ +/*! \file DMA_RecFifo.c + * + * \brief Implementations for Functions Defined in bgp/arch/include/spi/DMA_RecFifo.h. + * + */ +#include <linux/version.h> +#include <linux/module.h> +#include <asm/bitops.h> + +#undef DEBUG_PRINT +/* #define DEBUG_PRINT 1 */ + +#ifndef __LINUX_KERNEL__ + +#include <spi/DMA_RecFifo.h> +#include <stdio.h> +#include <bpcore/ppc450_inlines.h> +#include <bpcore/ic_memmap.h> +#include <common/bgp_bitnumbers.h> +#include <errno.h> + +#else + +#include <spi/linux_kernel_spi.h> +/* Interrupt encoding for Blue Gene/P hardware). + * Given a BIC group and bit index within the group, + * bic_hw_to_irq(group, gint) returns the Linux IRQ number. + * ( really from asm/bluegene.h but we get mismatches if we include it) + */ + +#endif /* ! __LINUX_KERNEL__ */ + +#include <linux/dma-mapping.h> + +#define TRACE(x) printk x + + +#if defined(BGP_DD1_WORKAROUNDS) + +/*! + * \brief Number of times the poll functions have been called and returned + * no packets processed. + * + * Special Value: -1 means that the Kernel_ClearFullReceptionFifo() syscall + * has been invoked, but no packets have been processed + * since. This tells the poll function that even if it + * does not process any packets, it should not increment + * this counter and ultimately issue the syscall again, because + * there is no need. + */ +int NumEmptyPollFunctionCalls = -1; + +/*! + * \brief Limit for NumEmptyPollFunctionCalls + */ +const int NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT = 10; + +#endif + +#if defined(CONFIG_BGP_STATISTICS) +int reception_fifo_histogram[33] ; +unsigned int reception_hi_watermark ; +#endif +static inline int get_tlb_pageid(int tlbindex) + { + int rc ; + /* PPC44x_TLB_PAGEID is 0 */ + asm volatile( "tlbre %[rc],%[index],0" + : [rc] "=r" (rc) + : [index] "r" (tlbindex) + ) ; + return rc ; + } + +static inline int get_tlb_xlat(int tlbindex) + { + int rc ; + /* PPC44x_TLB_XLAT is 1 */ + asm volatile( "tlbre %[rc],%[index],1" + : [rc] "=r" (rc) + : [index] "r" (tlbindex) + ) ; + return rc ; + } + +static inline int get_tlb_attrib(int tlbindex) + { + int rc ; + /* PPC44x_TLB_ATTRIB is 2 */ + asm volatile( "tlbre %[rc],%[index],2" + : [rc] "=r" (rc) + : [index] "r" (tlbindex) + ) ; + return rc ; + } + +static inline int search_tlb(unsigned int vaddr) + { + int rc ; + /* PPC44x_TLB_ATTRIB is 2 */ + asm volatile( "tlbsx %[rc],0,%[vaddr]" + : [rc] "=r" (rc) + : [vaddr] "r" (vaddr) + ) ; + return rc ; + } + +static void show_tlbs(unsigned int mioaddr) __attribute__((unused)) ; +static void show_tlbs(unsigned int mioaddr) +{ + int i ; + int tlb_index = search_tlb(mioaddr) ; + for(i=0;i<64;i+=1) + { + int pageid=get_tlb_pageid(i) ; + int xlat=get_tlb_xlat(i) ; + int attrib=get_tlb_attrib(i) ; + if( pageid & 0x00000200) + { + printk(KERN_INFO "tlb[%02d]=[%08x %08x %08x]\n",i,pageid,xlat,attrib) ; + } + } + printk(KERN_INFO "mioaddr=0x%08x tlb_index=%d\n", mioaddr,tlb_index) ; +} + +/* char temp_packet[256] __attribute__ ((aligned ( 16))) ; */ + +/*! + * \brief DMA Reception Fifo Shared Memory Structure + * + * This structure must be shared among the processors in a compute node. It + * contains info that must be maintained and shared for the duration of a job. + * This storage is static, maintained across function calls. + * In sharedmemory mode, core 0 maintains this info. + * In virtual node mode, each core maintains its own info. + * + */ +typedef struct DMA_RecFifoSharedMemory_t +{ + DMA_RecFifoRecvFunction_t recvFunctions[256]; /*!< The registered "normal" + reception fifo receive functions. + Filled in by calls to + DMA_RecFifoRegisterRecvFunction(). */ + + void *recvFunctionsParms[256]; /*!< recvFunctionsParms[i] is the + parameter to pass to + recvFunctions[i]. + Filled in by calls to + DMA_RecFifoRegisterRecvFunction(). */ + + DMA_RecFifoRecvFunction_t headerRecvFunction; /*!< The registered "header" + reception fifo receive function. + Filled in by a call to + DMA_RecFifoRegisterRecvFunction(). */ + + void *headerRecvFunctionParm; /*!< The parameter to pass to + headerRecvFunction. + Filled in by a call to + DMA_RecFifoRegisterRecvFunction(). */ + + DMA_RecFifoRecvFunction_t errorRecvFunction; /*!< The registered "error" + reception fifo receive function. + Defaulted to + &DMA_RecFifoDefaultErrorRecvFunction. + Filled in by a call to + DMA_RecFifoRegisterRecvFunction(). */ + + void *errorRecvFunctionParm; /*!< The parameter to pass to + errorRecvFunction. + Filled in by a call to + DMA_RecFifoRegisterRecvFunction(). */ + + DMA_RecFifoGroup_t groups[DMA_NUM_REC_FIFO_GROUPS]; /*!< Reception fifo + group structures, one for each group. + groups[i] is the group shared by all + users of reception fifo group i. */ + + unsigned int groupsInitialized[DMA_NUM_REC_FIFO_GROUPS]; /*!< Indicator of + groups[i] having been initialized. + 0 = not initialized by a call to + DMA_RecFifoGetFifoGroup() for + group i. + 1 = initialized. */ + +} DMA_RecFifoSharedMemory_t; + + +/*! + * \brief Storage for the Reception Fifo Shared Memory Structure + * + * This storage is static, maintained across function calls. + * In sharedmemory mode, core 0 maintains reception fifo info. + * In virtual node mode, each core maintains its own reception fifo info. + */ +static DMA_RecFifoSharedMemory_t DMA_RecFifoInfo; + + +/*! + * \brief DMA Packet I/O Vector Structure + * + * This structure describes the payload of a memory fifo packet. + * Because of fifo wrapping, the payload may consist of 0, 1, or 2 segments: + * - 0 segments: this is a packet in the header-only, debug fifo. + * - 1 segment: the packet does not wrap the fifo. + * - 2 segments: the packet does wrap the fifo. + * + */ +typedef struct DMA_PacketIovec_t +{ + int num_segments; /*!< Number of segments in the payload */ + void *payload_ptr[2] ; /*!< Pointer to the payloads in each segment (NULL + if not used). */ + int num_bytes[2]; /*!< Number of payload bytes in each segment (0 if + not used). */ +} +ALIGN_L1D_CACHE DMA_PacketIovec_t; + + +static void dumpmem(const void *address, unsigned int length, const char * label) + { + int x ; + printk(KERN_INFO "(>)[%s:%d] Memory dump: %s\n",__func__, __LINE__,label) ; + for (x=0;x<length;x+=32) + { + int *v = (int *)(address+x) ; + printk(KERN_INFO "%p: %08x %08x %08x %08x %08x %08x %08x %08x\n", + v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7] + ) ; + } + printk(KERN_INFO "(<)[%s:%d] Memory dump\n",__func__, __LINE__) ; + } + + + +/*! + * \brief Get DMA Reception Fifo Group + * + * This is a wrapper around a System Call. This function returns THE + * one-and-only pointer to the fifo group structure, with the entries all + * filled in from info in the DCRs. If called multiple times with the same + * group, it will always return the same pointer, and the system call will + * not be invoked again. + * + * It must be called AFTER DMA_RecFifoSetMap(). + * + * By convention, the same "target" is used for normal and header fifo + * interrupts (could be changed). In addition, by convention, interrupts for + * fifos in group g come out of the DMA as non-fatal irq bit 28+g, + * ie, only fifos in group g can cause the "type g" threshold interrupts. + * + * \param[in] grp The group number (0 through DMA_NUM_REC_FIFO_GROUPS). + * \param[in] target The core that will receive the interrupt when a + * fifo in this group reaches its threshold + * (0 to DMA_NUM_REC_FIFO_GROUPS-1). + * Ignored on subsequent call with the same group. + * \param[in] normal_handler A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * normal fifo in this group reaches its threshold. + * This function must be coded to take 4 uint32_t + * parameters: + * - A pointer to storage specific to this + * handler. This is the normal_handler_parm + * specified on this function call. + * - 3 uint32_t parameters that are not used. + * If normal_handler is NULL, threshold interrupts + * are not delivered for normal fifos in this group. + * Ignored on subsequent call with the same group. + * \param[in] normal_handler_parm A pointer to storage that should be passed + * to the normal interrupt handling function + * (see normal_handler parameter). + * Ignored on subsequent call with the same + * group. + * \param[in] header_handler ** This parameter is deprecated. Specify NULL.** + * A pointer to the function to receive control in + * the I/O thread to handle the interrupt when a + * header fifo in this group reaches its threshold. + * This function must be coded to take 2 parameters: + * void* A pointer to storage specific to this + * handler. This is the header_handler_parm + * specified on this function call. + * int The global fifo ID of the fifo that hit + * its threshold (0 through + * NUM_DMA_REC_FIFOS-1). + * If header_handler is NULL, threshold interrupts + * are not delivered for header fifos in this group. + * Ignored on subsequent call with the same group. + * \param[in] header_handler_parm ** This parameter is deprecated. Specify + * NULL. ** + * A pointer to storage that should be passed + * to the header interrupt handling function + * (see header_handler parameter). + * Ignored on subsequent call with the same + * group. + * \param[in] interruptGroup A InterruptGroup_t that identifies the + * group of interrupts that the fifos in this group + * will become part of. + * Ignored on subsequent call with the same group. + * + * \return RecFifoGroupStruct Pointer to a DMA Reception Fifo Group structure + * that reflects the fifos that are being used in + * this group. This same structure is shared by + * all users of this reception fifo group. + * NULL is returned if an error occurs. + * + * \note The following comments from Phil about the internals of the syscall: + * - error checks + * - 0 <= group_id < 4 + * - the start of the fifo group is a valid virtual address (tlb mapped)? + * - disable the rDMA + * - call _BGP_rDMA_Fifo_Get_Map to get the DCR mapping information + * - loop through the map to determine how many and which fifos in this group + * are used, including headers + * - filling in the addresses of used fifos + * - In particular, any pointer to any fifo in the group that is not used + * will have a null pointer + * - furthermore, + * - write starting values to all used fifos + * - make sure all interrupts are cleared + * - enable rDMA + * + */ +DMA_RecFifoGroup_t * +DMA_RecFifoGetFifoGroup( + int grp, + int target, + Kernel_CommThreadHandler normal_handler, + void *normal_handler_parm, + Kernel_CommThreadHandler header_handler, + void *header_handler_parm, + Kernel_InterruptGroup_t interruptGroup + ) +{ + int rc; + + TRACE(( + KERN_INFO "(>) DMA_RecFifoGetFifoGroup\n")); + + SPI_assert( (0 <= grp ) && (grp < DMA_NUM_REC_FIFO_GROUPS ) ); + SPI_assert( (0 <= target) && (target < DMA_NUM_REC_FIFO_GROUPS ) ); + + if ( DMA_RecFifoInfo.groupsInitialized[grp] == 0 ) /* Is */ + /* DMA_RecFifoGroups[grp] not */ + /* filled-in yet? */ + { + /* + * If an interrupt handler has been specified, invoke the system call + * to configure the kernel to invoke the handler when the reception + * fifo threshold crossed interrupt fires. + */ + + if (normal_handler) + { + { + /* + * Calculate the IRQ to be one of + * - 28: rec fifo type 0 crossed threshold + * - 29: rec fifo type 0 crossed threshold + * - 30: rec fifo type 0 crossed threshold + * - 31: rec fifo type 0 crossed threshold + * based on the DMA group number. + */ + unsigned irqInGroup = 28 + grp; +/* tjcw ???? not sure what gets the right interrupts ... */ +/* 28+ gives something to do with memory tranfers. */ +/* we want 8+, which is related to FIFO fullness */ +/* unsigned irqInGroup = 8 + grp; */ + + /* + * Calculate an interrupt ID, which is the BIC interrupt group (2) + * combined with the IRQ number. + */ +/* int interruptID = Kernel_MkInterruptID(_BGP_IC_DMA_NFT_G2_HIER_POS, */ +/* irqInGroup); */ + int interruptID = bic_hw_to_irq(_BGP_IC_DMA_NFT_G2_HIER_POS,irqInGroup); + + /* + * Calculate the opcode indicating + * - the target core for interrupt + * - to call the specified function when the interrupt fires + * - to disable interrupts before calling the specified function + * - to enable interrupts after callling the specified function + */ + int opcode = ( COMMTHRD_OPCODE_CORE0 + target ) | + COMMTHRD_OPCODE_CALLFUNC | + COMMTHRD_OPCODE_DISABLEINTONENTRY | + COMMTHRD_OPCODE_ENABLEINTONPOOF ; + + /* + * Configure this interrupt with the kernel. + */ + TRACE(( + KERN_INFO "(=) DMA_RecFifoGetFifoGroup interruptID=%d\n",interruptID)); + rc = Kernel_SetCommThreadConfig(interruptID, + opcode, + (uint32_t*)interruptGroup, + normal_handler, + (uint32_t)normal_handler_parm, + (uint32_t)NULL, + (uint32_t)NULL, + (uint32_t)NULL); + if (rc) return NULL; + } + + /* + * Proceed to get the reception fifo group + */ + rc = Kernel_RecFifoGetFifoGroup( (uint32_t*)&(DMA_RecFifoInfo.groups[grp]), + grp, + target, + (uint32_t) NULL, /* Normal handler. Not used */ + (uint32_t) NULL, /* Normal handler parm. Not used */ + (uint32_t) NULL, /* Header handler. Not used */ + (uint32_t) NULL, /* Header handler parm. Not used */ + (uint32_t) NULL /* InterruptGroup. Not used */ + ); + if ( rc == 0 ) /* Success? */ + { + DMA_RecFifoInfo.groupsInitialized[grp] = 1; /* Remember success. */ + } + else + { + return NULL; /* Failure */ + } + } + } + TRACE(( + KERN_INFO "(<) DMA_RecFifoGetFifoGroup\n")); + + return &(DMA_RecFifoInfo.groups[grp]); /* Return the pointer. */ + +} + + +/*! + * \brief Register a Reception Fifo Receive Function + * + * Register a specified receive function to handle packets having a specific + * "registration ID". It returns a registration ID (0-255) that is to be used + * in the packet header Func_Id field, such that packets that arrive in a + * reception fifo will result in the corresponding receive function being called + * when that fifo is processed by a polling or interrupt handler function. + * + * \param[in] recv_func Pointer to the receive function. + * \param[in] recv_func_parm Arbitrary pointer to be passed to the + * recv_func when it is called. + * \param[in] is_error_function 1 means this is the receiver function + * to be called if a packet contains an invalid + * (unregistered) registration ID. The return + * value from this function is zero, indicating + * success, not indicating a registration ID. + * A default function is provided if one is not + * registered. If there is already a non-default + * error receive function registered, -EBUSY is + * returned. + * 0 means this is not the error receiver + * function. + * \param[in] is_header_fifo Indicates whether the fifo is normal or + * header: + * - 0 is normal. The return code is the + * registration ID. + * - 1 is header. The return code is 0, + * indicating success, because packets in + * header fifos are direct-put packets, and + * hence have no registration ID. + * If there is already a header receive function + * registered, -EBUSY is returned. + * + * If both is_error_function and is_header_fifo are 1, -EINVAL is returned. + * + * \retval 0 This is a registration ID if is_error_function=0 and + * is_header_fifo=0. Otherwise, it indicates success. + * 1-255 This is a registration ID. Successful. + * negative Failure. This is a negative errno value. + * + * \see DMA_RecFifoDeRegisterRecvFunction + */ +static int DMA_RecFifoRegisterRecvFunction_next_free_ID = 0; +int DMA_RecFifoRegisterRecvFunction( + DMA_RecFifoRecvFunction_t recv_func, + void *recv_func_parm, + int is_error_function, + int is_header_fifo + ) +{ + int next_free_ID = DMA_RecFifoRegisterRecvFunction_next_free_ID; + int i; + + /* Perform error checks */ + if ( ( recv_func == NULL ) || + ( ( is_error_function != 0 ) && + ( is_error_function != 1 ) ) || + ( ( is_header_fifo != 0 ) && + ( is_header_fifo != 1 ) ) || + ( ( is_header_fifo == 1 ) && ( is_error_function == 1 ) ) ) + { + return -EINVAL; + } + + /* + * Handle a "header" receive function. + */ + if ( is_header_fifo == 1 ) + { + if ( DMA_RecFifoInfo.headerRecvFunction != NULL ) /* Already registered?*/ + { + return -EBUSY; + } + DMA_RecFifoInfo.headerRecvFunction = recv_func; + DMA_RecFifoInfo.headerRecvFunctionParm = recv_func_parm; + return 0; /* Indicate success */ + } + + /* + * Handle a "error" receive function. + */ + if ( is_error_function == 1 ) + { + if ( DMA_RecFifoInfo.errorRecvFunction != + &DMA_RecFifoDefaultErrorRecvFunction ) /* Already registered? */ + { + return -EBUSY; + } + DMA_RecFifoInfo.errorRecvFunction = recv_func; + DMA_RecFifoInfo.errorRecvFunctionParm = recv_func_parm; + return 0; /* Indicate success */ + } + + /* + * Handle a "normal" receive function. + */ + + for (i=next_free_ID; i < 256; i++) /* Search for an empty slot */ + { + if ( DMA_RecFifoInfo.recvFunctions[i] == NULL ) /* Found a slot? */ + { + DMA_RecFifoInfo.recvFunctions[i] = recv_func; + DMA_RecFifoInfo.recvFunctionsParms[i] = recv_func_parm; + next_free_ID = i; + return i; /* Return the registration ID */ + } + } + DMA_RecFifoRegisterRecvFunction_next_free_ID = next_free_ID; + + return -EBUSY; /* No open slots */ + +} + + +/*! + * \brief De-Register a Reception Fifo Receive Function + * + * De-register a previously registered receive function. + * + * \param[in] registrationId Registration Id returned from + * DMA_RecFifoRegisterRecvFunction (0..255). + * A negative value means that no + * registration id is specified. + * \param[in] is_error_function 1 means the error receive function is + * to be de-registered. + * 0 otherwise. + * \param[in] is_header_fifo 1 means the header fifo receive function is + * to be de-registered. + * 0 otherwise. + * + * \retval 0 Success + * negative Error value + * + * \see DMA_RecFifoRegisterRecvFunction + */ +int DMA_RecFifoDeRegisterRecvFunction( + int registrationId, + int is_error_function, + int is_header_fifo + ) +{ + /* Perform error checks */ + if ( ( registrationId > 255 ) || + ( ( is_error_function != 0 ) && + ( is_error_function != 1 ) ) || + ( ( is_header_fifo != 0 ) && + ( is_header_fifo != 1 ) ) ) + { + return -EINVAL; + } + + /* + * Handle a "header" receive function. + */ + if ( is_header_fifo == 1 ) + { + DMA_RecFifoInfo.headerRecvFunction = NULL; + DMA_RecFifoInfo.headerRecvFunctionParm = NULL; + } + + /* + * Handle a "error" receive function. + */ + if ( is_error_function == 1 ) + { + DMA_RecFifoInfo.errorRecvFunction = NULL; + DMA_RecFifoInfo.errorRecvFunctionParm = NULL; + } + + /* + * Handle a "normal" receive function. + */ + + if ( registrationId >= 0 ) + { + DMA_RecFifoInfo.recvFunctions[registrationId] = NULL; + DMA_RecFifoInfo.recvFunctionsParms[registrationId] = NULL; + DMA_RecFifoRegisterRecvFunction_next_free_ID = 0; /* Start at beginning next time */ + } + + return 0; + +} + + +/*! + * \brief DMA Reception Fifo Default Error Receive Function + * + * This is the default function that will handle packets having an + * unregistered registration ID. + * + * \param[in] f_ptr Pointer to the reception fifo. + * \param[in] packet_ptr Pointer to the packet header (== va_head). + * This is quad-aligned for optimized copying. + * \param[in] recv_func_parm Pointer to storage specific to this receive + * function. This pointer was specified when the + * receive function was registered with the kernel, + * and is passed to the receive function + * unchanged. + * \param[in] payload_ptr Pointer to the beginning of the payload. + * This is quad-aligned for optimized copying. + * \param[in] payload_bytes Number of bytes in the payload + * + * \retval -1 An unregistered packet was just processed. This is considered + * an error. + */ +int DMA_RecFifoDefaultErrorRecvFunction( + DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) +{ + int i; + + printf ( "\nUnregistered Packet Received in Reception Fifo %d\n", + f_ptr->global_fifo_id); + + printf ( "Packet Header:\n"); + printf ( "%08x%08x%08x%08x\n",*((int*)&packet_ptr[0]), + *((int*)&packet_ptr[4]), + *((int*)&packet_ptr[8]), + *((int*)&packet_ptr[12])); + printf ( "Packet Payload:\n"); + + for (i=0; i<payload_bytes; i+=16); + { + printf ( "%08x%08x%08x%08x\n",*((int*)&payload_ptr[i]), + *((int*)&payload_ptr[i+4]), + *((int*)&payload_ptr[i+8]), + *((int*)&payload_ptr[i+12])); + } + + SPI_assert(0); + + return -1; +} + + +/*! + * \brief DMA Reception Fifo Get Addresses + * + * Analyze the packet at the head of the reception fifo and return a + * DMA_PacketIovec_t describing the payload of the packet. In particular, + * determine if the packet is contiguous in the fifo, or whether it wraps + * around to the start of the fifo. + * + * \param[in] f_ptr Pointer to the reception fifo structure. + * \param[in,out] io_vec Pointer to the packet I/O vector structure to + * be filled in. + * + * \return The io_vec structure has been filled-in. + * + * \pre The caller has determined that the fifo has a packet in it (it + * is not empty). + * + * \note + * - For non-header packets, only non-DMA packets (memory fifo packets) + * are in the fifo and need to be handled. + */ +void DMA_RecFifoGetAddresses( + DMA_RecFifo_t *f_ptr, + DMA_PacketIovec_t *io_vec + ) +{ + DMA_PacketHeader_t *packet_ptr; + unsigned int payload_bytes; + unsigned int payload_bytes_to_end_of_fifo = 0; + + SPI_assert( f_ptr != NULL ); + SPI_assert( io_vec != NULL ); + + if ( f_ptr->global_fifo_id < DMA_NUM_NORMAL_REC_FIFOS ) /* Is this a */ + /* normal fifo?*/ + { /* Yes. Process a normal packet */ + packet_ptr = (DMA_PacketHeader_t*)f_ptr->dma_fifo.va_head; /* Point */ + /* to the packet. */ + + payload_bytes = ( (packet_ptr->Chunks + 1) << 5 ) - + sizeof(DMA_PacketHeader_t); /* Calculate payload bytes. */ + + io_vec->payload_ptr[0] = + (char*)packet_ptr + + sizeof(DMA_PacketHeader_t); /* Set first payload ptr */ + + /* Determine if the payload is contiguous in the fifo, and set up */ + /* the iovec accordingly. */ + if ( ( payload_bytes <= 16 ) || /* A 32-byte packet will always be */ + /* contiguous...this is an */ + /* optimization to avoid the next */ + /* set of calculations. */ + ( payload_bytes <= /* Calculate how much space to the */ + ( payload_bytes_to_end_of_fifo = /* end of the fifo. */ + ( (unsigned)f_ptr->dma_fifo.va_end - /* Check if entire */ + (unsigned)io_vec->payload_ptr[0] ) ) ) ) /* payload fits.*/ + { + /* Set up io_vec for contiguous payload */ + io_vec->num_segments = 1; /* Indicate contiguous payload. */ + io_vec->num_bytes[0] = payload_bytes; + io_vec->payload_ptr[1] = NULL; + io_vec->num_bytes[1] = 0; + return; + } + else + { /* Set up io_vec for non-contiguous payload. */ + + io_vec->num_segments = 2; /* Indicate split payload. */ + io_vec->num_bytes[0] = payload_bytes_to_end_of_fifo; + io_vec->payload_ptr[1] = f_ptr->dma_fifo.va_start; + io_vec->num_bytes[1] = payload_bytes - + payload_bytes_to_end_of_fifo; + return; + } + } /* End: Non-header packet */ + + else /* Header packet. */ + + { /* Header packet */ + io_vec->num_segments = 0; /* Indicate header fifo. */ + io_vec->payload_ptr[0] = NULL; /* Everything else is NULL or zero. */ + io_vec->payload_ptr[1] = NULL; + io_vec->num_bytes[0] = 0; + io_vec->num_bytes[1] = 0; + return; + } + +} /* End: DMA_RecFifoGetAddresses() */ + + +/*! + * \brief Get Index of Next Reception Fifo in Group + * + * A reception fifo group contains up to DMA_NUM_REC_FIFOS_PER_GROUP. + * It contains an array of fifos. Up to fg_ptr->num_normal_fifos normal + * fifos are in the first array slots. Up to 1 header fifo is in the + * last array slot. + * + * This function returns the array index of the next normal fifo in the group + * that is being used, based upon the desired fifo_index and the not-empty + * status. + * + * If *not_empty_status is -1, the status is fetched from the DMA SRAM (first + * time condition). + * + * If the DMA SRAM not-empty status for this group is all zero (all fifos are + * empty), the status is checked num_empty_passes times with a slight delay + * in between to give the DMA time to make progress before returning a -1, + * indicating that there is nothing more to process. + * + * \param[in] fg_ptr Pointer to the fifo group + * \param[in] desired_fifo_index Index of the fifo that is desired to be + * processed. + * \param[in,out] fifo_bit Pointer to the bit in the not_empty_status + * that corresponds to the desired_fifo_index + * (on input) and the returned next_fifo_index + * (on output). + * \param[in] num_empty_passes When the not-empty status indicates that all + * fifos in the group are emtpy, this is the + * number of times the not-empty status is + * re-fetched and re-checked before officially + * declaring that they are indeed empty + * (0 means no extra passes are made). + * \param[in] not_empty_poll_delay The number of pclks to delay between polls + * of the not-empty status when the fifos are + * empty. + * \param[in,out] not_empty_status Pointer to the location to shadow the + * not empty status. + * + * \retval next_fifo_index Index of the next fifo in the group to be + * processed. + * \retval -1 Indicates that the normal fifos in the group are + * all empty. + * + * \post The va_tail of the fifo that is returned has been refreshed from + * the DMA hardware. + * + */ +__INLINE__ int DMA_RecFifoGetNextFifo( + DMA_RecFifoGroup_t *fg_ptr, + int desired_fifo_index, + unsigned int *fifo_bit, + int num_empty_passes, + int not_empty_poll_delay, + unsigned int *not_empty_status + ) +{ + unsigned int status = *not_empty_status; /* Make a local copy */ + unsigned int status_bit = *fifo_bit; + int fifo_index = desired_fifo_index; + + /* + * If *not_empty_status is 0, either the status has not been fetched yet + * (first-time condition), or all fifos were emptied. Go fetch the + * not-empty status again. + */ + if ( status == 0 ) + { + status = DMA_RecFifoGetNotEmpty( fg_ptr, + 0 ); /* Get Normal fifo */ + /* not-empty status. */ + *not_empty_status = status; /* Return the status to the caller */ + +#ifdef DEBUG_PRINT + printf("New notEmptyStatus1=0x%08x\n",*not_empty_status); +#endif + } + + /* + * If the DMA SRAM not-empty status for this group is all zero (all fifos are + * empty), the status is checked num_empty_passes times with a slight delay + * in between to give the DMA time to make progress before returning a -1, + * indicating that there is nothing more to process. + */ + while ( ( status == 0 ) && + ( num_empty_passes-- > 0 ) ) + { + /* Delay, allowing the DMA to update its status */ + unsigned int pclks = not_empty_poll_delay; + while( pclks-- ) + { + asm volatile("nop;"); + } + + /* Re-fetch the not-empty status */ + status = DMA_RecFifoGetNotEmpty( fg_ptr, + 0 ); /* Get Normal fifo */ + /* not-empty status. */ + *not_empty_status = status; /* Return the status to the caller */ + +#ifdef DEBUG_PRINT + printf("New notEmptyStatus2=0x%08x\n",*not_empty_status); +#endif + } + + if ( status == 0 ) return (-1); /* Can't find any not empty */ + + /* + * We have some fifos that are not empty. + * Determine the fifo_index to be returned. + * Loop until we hit a non-empty fifo. + */ +#ifdef DEBUG_PRINT + printf("Checking status1 = 0x%08x for fifo_index %d, bit 0x%08x\n", status, fifo_index, status_bit); +#endif + + while ( ( status & status_bit ) == 0 ) + { + fifo_index++; /* Try next fifo. */ + if ( fifo_index >= fg_ptr->num_normal_fifos ) /* Wrap? */ + fifo_index = 0; /* Start over with zero. */ + + status_bit = _BN(fg_ptr->fifos[fifo_index].global_fifo_id); /* Map to */ + /* proper not-empty bit. */ + +#ifdef DEBUG_PRINT + printf("Checking status2 = 0x%08x for fifo_index %d, bit 0x%08x\n", status, fifo_index, status_bit); +#endif + } + + /* Refresh the tail because the DMA may have moved it */ + DMA_RecFifoGetTailById( fg_ptr, + fifo_index ); + + *fifo_bit = status_bit; /* Return the fifo index and its bit */ + +#ifdef DEBUG_PRINT + printf("Returning fifo_index=%d, status bit 0x%08x\n",fifo_index,status_bit); +#endif + + return (fifo_index); + +} /* End: DMA_RecFifoGetNextFifo() */ + + +/*! + * \brief Poll Normal Reception Fifos + * + * Poll the "normal" reception fifos in the specified fifo group, removing one + * packet after another from the fifos, dispatching the appropriate receive + * function for each packet, until one of the following occurs: + * 1. Total_packets packets are received + * 2. All the fifos are empty + * 3. A receive function returns a non-zero value + * 4. The last packet removed from a fifo has an invalid registration id. The + * error receive function will have been called, but polling ends. + * The invalid packet is counted as a processed packet, and the return + * code from the error receive function is returned. + * + * Polling occurs in a round-robin fashion through the array of normal fifos in + * the group, beginning with array index starting_index. If a fifo has a packet, + * the appropriate receive function is called. Upon return, the packet is + * removed from the fifo (the fifo head is moved past the packet). + * + * After processing packets_per_fifo packets in a fifo (or emptying that fifo), + * the next fifo in the group is processed. When the last index in the fifo + * array is processed, processing continues with the first fifo in the array. + * Multiple loops through the array of fifos in the group may occur. + * + * The receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always be 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always return a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. The packet header and payload are + * 16-byte aligned for optimized copying. + * + * \param[in] total_packets The maximum number of packets that will be + * processed. + * \param[in] packets_per_fifo The maximum number of packets that will be + * processed in a given fifo before switching + * to the next fifo. + * \param[in] starting_index The fifos in the fifo group are maintained + * in an array. This is the array index of the + * first fifo to be processed (0 through + * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1). + * \param[in] num_empty_passes The number of passes over the normal fifos + * while they are empty that this function + * should tolerate before giving up and + * returning. This is an optimization + * to catch late arriving packets. + * (0 means no extra passes are made). + * \param[in] not_empty_poll_delay The number of pclks to delay between polls + * of the not-empty status when the fifos are + * empty. + * \param[in] fg_ptr Pointer to the fifo group. + * \param[out] next_fifo_index Pointer to an int where the recommended + * starting_index for the next call is returned. + * + * \retval num_packets_received The number of packets received and processed. + * next_fifo_index is set. + * \retval negative_value The return code from the receive function that + * caused polling to end. next_fifo_index is + * set. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * + * \note next_fifo_index is set to the index of the fifo that had the last + * packet received if all packets_per_fifo packets were not received from + * that fifo. However, if all packets_per_fifo packets were received + * from that fifo, the index of the next fifo will be returned. + * + */ +int DMA_RecFifoPollNormalFifos(int total_packets, + int packets_per_fifo, + int starting_index, + int num_empty_passes, + int not_empty_poll_delay, + DMA_RecFifoGroup_t *fg_ptr, + int *next_fifo_index + ) +{ + int fifo_index; /* Index of fifo being processed */ + unsigned int fifo_bit_number; /* The bit number of the fifo */ + /* being processed. Group0: 0-7, */ + /* Group1: 8-15, Group2: 16-23, */ + /* Group3: 24-31. Corresponds to */ + /* the DMA not-empty status bits. */ + int num_fifos_in_group; /* Number of fifos in this group. */ + int num_packets_in_fifo; /* Count of packets processed in a */ + /* fifo. */ + unsigned int not_empty_status=0; /* Snapshot of the not empty status*/ + /* for this group. 0 indicates */ + /* that no snapshot has occurred */ + /* yet. */ + int rc = 0; /* Return code from recv_func. */ + int num_processed = 0; /* Number of packets processed */ + DMA_PacketIovec_t io_vec; /* Payload I/O vector */ + DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function */ + void *recv_func_parm;/* Receive function parameter */ + int recv_func_id; /* Function ID from the packet */ + /* header. */ + void *recv_func_payload;/* Pointer to recv func payload */ + void *recv_func_packet; /* Pointer to recv func packet */ + DMA_RecFifo_t *fifo_ptr; /* Pointer to fifo being processed */ + char temp_packet[256] ALIGN_QUADWORD; /* Temporary packet copy. */ + /* Align for efficient copying. */ + char *load_ptr, *store_ptr; /* Used for copying bytes */ + int num_quads; /* Number of quads to copy */ + DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */ + + SPI_assert( total_packets > 0 ); + SPI_assert( packets_per_fifo > 0 ); + SPI_assert( packets_per_fifo <= total_packets ); + SPI_assert( num_empty_passes >= 0 ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( next_fifo_index != NULL ); + SPI_assert( ( starting_index >= 0 ) && + ( starting_index < fg_ptr->num_normal_fifos ) ); + + num_fifos_in_group = fg_ptr->num_normal_fifos; + *next_fifo_index = starting_index; /* Tell caller to start with the same */ + /* fifo next time. */ + fifo_index = starting_index; /* Start with the fifo the caller says to*/ + +#ifdef DEBUG_PRINT + int i; + for (i=0; i<fg_ptr->num_normal_fifos; i++) + printf("FifoIndex=%d <--> GlobalID=%d\n",i,fg_ptr->fifos[i].global_fifo_id); +#endif + + /* + * Circularly loop through the not-empty fifos in the fifo group. + * Keep going until one of the termination conditions documented in the + * prolog occurs. + * + */ + for (;;) + { + /* + * Find the next fifo to process. + */ + fifo_ptr = &fg_ptr->fifos[fifo_index]; /* This is the fifo itself*/ + fifo_bit_number = _BN(fifo_ptr->global_fifo_id);/* The fifo's status bit*/ + + fifo_index = DMA_RecFifoGetNextFifo(fg_ptr, + fifo_index, + &fifo_bit_number, + num_empty_passes, + not_empty_poll_delay, + ¬_empty_status); + if (fifo_index < 0) { /* No more packets to process? */ + +#if defined(BGP_DD1_WORKAROUNDS) + /* + * If there are no more non-empty fifos, count the number of consecutive + * times the poll function came up dry (num_processed == 0), and if it + * exceeds a threshold, issue a system call to clear the rDMA's "full + * reception fifo" condition so it begins to receive packets again. + * + * When a non-empty fifo is returned, its shadow va_tail pointer has been + * updated to reflect the amount of packet data in the fifo. + */ + if (num_processed > 0) { /* Did we process at least 1 packet? */ + NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */ + /* likely not encountered a fifo full */ + /* condition and stopped. Reset the */ + /* fifo counter so we will start */ + /* tracking empty calls to poll. */ + } + else { + if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */ + (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) { + /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */ + rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */ + /* reception fifos filled and the */ + /* DMA has stopped. */ + /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */ + NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */ + /* fill-fifo counter. */ + } + } +#endif + /* printf("Poll: returned %d processed\n",num_processed); */ + return (num_processed); + } + + *next_fifo_index = fifo_index; /* Tell caller to start with this fifo */ + /* next time. */ + fifo_ptr = &(fg_ptr->fifos[fifo_index]); + num_packets_in_fifo = 0; + + /* + * MSYNC before we look at the data in the fifo to ensure that snoops + * issued by the DMA have completed. This ensures the L1 cache + * invalidations have completed so we don't look at stale data. + */ + _bgp_msync(); + + /* + * Within a fifo: The area between the va_head and va_tail shadow pointers + * contains packets to be processed. Loop, processing those packets until + * we have processed packets_per_fifo of them, or all of them, or other + * issues come up. + * + */ +#if defined(CONFIG_BGP_STATISTICS) + { + unsigned int used_space = (fifo_ptr->dma_fifo.va_tail >= fifo_ptr->dma_fifo.va_head) + ? ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) + : (fifo_ptr->dma_fifo.fifo_size + ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) ) + ; + reception_fifo_histogram[fls(used_space)] += 1 ; + } +#endif + while ( ( num_packets_in_fifo < packets_per_fifo ) && + ( fifo_ptr->dma_fifo.va_head != fifo_ptr->dma_fifo.va_tail ) ) + { + DMA_RecFifoGetAddresses( fifo_ptr, + &io_vec ); /* Get the payload pointer(s) */ + /* for the packet at the head */ + /* of the fifo. */ + + packet_ptr = (DMA_PacketHeader_t*) + fifo_ptr->dma_fifo.va_head; /* Point to packet header*/ + +#ifdef DEBUG_PRINT + printf("ReceivedPacketHead = 0x%08x\n",(unsigned)packet_ptr); + printf("ReceivedPacketIovec= 0x%08x %d, 0x%08x %d\n", + (unsigned)io_vec.payload_ptr[0], io_vec.num_bytes[0], + (unsigned)io_vec.payload_ptr[1], io_vec.num_bytes[1]); +#endif + /* + * Determine the receive function to call. Index into + * recvFunctions array is in the packet header. + */ + recv_func_id = packet_ptr->Func_Id; + recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id]; + if ( recv_func_ptr != NULL ) + { + recv_func_parm = + DMA_RecFifoInfo.recvFunctionsParms[recv_func_id]; + } + else + { + recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction; + recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm; + } + /* + * Use a temporary copy of the packet, when the payload + * wraps. + */ + if ( io_vec.num_segments > 1 ) + { +#ifdef DEBUG_PRINT + printf("Payload Wraps: Packet Header: 0x%08x, Iovecs: 0x%08x %d, 0x%08x %d\n", + (unsigned)packet_ptr, + (unsigned)io_vec.payload_ptr[0], io_vec.num_bytes[0], + (unsigned)io_vec.payload_ptr[1], io_vec.num_bytes[1]); +#endif + + /* Copy packet header and first payload segment */ + load_ptr = (char*)packet_ptr; + store_ptr = temp_packet; + num_quads = (sizeof(DMA_PacketHeader_t) + io_vec.num_bytes[0]) >> 4; + while ( num_quads > 0 ) + { +#ifdef DEBUG_PRINT + printf("load_ptr =0x%08x, load_value =0x%08x%08x%08x%08x\n", + (unsigned)load_ptr, *(unsigned*)load_ptr, *(unsigned*)(load_ptr+4), + *(unsigned*)(load_ptr+8), *(unsigned*)(load_ptr+12)); +#endif + _bgp_QuadLoad ( load_ptr, 0 ); + + _bgp_QuadStore( store_ptr, 0 ); +#ifdef DEBUG_PRINT + printf("store_ptr=0x%08x, store_value=0x%08x%08x%08x%08x\n", + (unsigned)store_ptr, *(unsigned*)store_ptr, *(unsigned*)(store_ptr+4), + *(unsigned*)(store_ptr+8), *(unsigned*)(store_ptr+12)); +#endif + + load_ptr += 16; + store_ptr += 16; + num_quads--; + } + /* Copy second payload segment */ + load_ptr = (char*)io_vec.payload_ptr[1]; + num_quads = io_vec.num_bytes[1] >> 4; + while ( num_quads > 0 ) + { +#ifdef DEBUG_PRINT + printf("load_ptr =0x%08x, load_value =0x%08x%08x%08x%08x\n", + (unsigned)load_ptr, *(unsigned*)load_ptr, *(unsigned*)(load_ptr+4), + *(unsigned*)(load_ptr+8), *(unsigned*)(load_ptr+12)); +#endif + _bgp_QuadLoad ( load_ptr, 0 ); + + _bgp_QuadStore( store_ptr, 0 ); +#ifdef DEBUG_PRINT + printf("store_ptr=0x%08x, store_value=0x%08x%08x%08x%08x\n", + (unsigned)store_ptr, *(unsigned*)store_ptr, *(unsigned*)(store_ptr+4), + *(unsigned*)(store_ptr+8), *(unsigned*)(store_ptr+12)); +#endif + load_ptr += 16; + store_ptr += 16; + num_quads--; + } + recv_func_payload = temp_packet + sizeof(DMA_PacketHeader_t); + recv_func_packet = temp_packet; + + } /* End: Set up temporary copy of split packet */ + + else /* Set up for contiguous packet */ + { + recv_func_payload = (char*)packet_ptr + + sizeof(DMA_PacketHeader_t); + recv_func_packet = packet_ptr; + } + + /* Call the receive function */ + if( recv_func_ptr ) + { + rc = (*recv_func_ptr)(fifo_ptr, + recv_func_packet, + recv_func_parm, + recv_func_payload, + io_vec.num_bytes[0]+io_vec.num_bytes[1]); + } + else + { + printk(KERN_ERR "DMA_RecFifoPollNormalFifos recv_func_ptr was NULL recv_func_id=%02x fifo_ptr=%p recv_func_packet=%p recv_func_parm=%p recv_func_payload=%p length=%d\n", + recv_func_id,fifo_ptr,recv_func_packet,recv_func_parm,recv_func_payload,io_vec.num_bytes[0]+io_vec.num_bytes[1]) ; + } + + /* Increment the head by the size of the packet */ + DMA_RecFifoIncrementHead(fifo_ptr, + (io_vec.num_bytes[0]+ + io_vec.num_bytes[1] + + sizeof(DMA_PacketHeader_t))>> 4); + + num_processed++; + + if ( rc != 0 ) /* Did receive function fail? */ + { +#if defined(BGP_DD1_WORKAROUNDS) + NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */ + /* likely not encountered a fifo full */ + /* condition and stopped. Reset the */ + /* fifo counter so we will start */ + /* tracking empty calls to poll. */ +#endif + /* Clear the threshold crossed condition, in case we have gone below + * the threshold. + */ + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + fifo_bit_number, + 0 ); + return (rc); /* Yes...return that return code */ + } + + if ( num_processed >= total_packets ) /* Got what they wanted? */ + { +#if defined(BGP_DD1_WORKAROUNDS) + NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */ + /* likely not encountered a fifo full */ + /* condition and stopped. Reset the */ + /* fifo counter so we will start */ + /* tracking empty calls to poll. */ +#endif + /* Clear the threshold crossed condition, in case we have gone below + * the threshold. + */ + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + fifo_bit_number, + 0 ); + return (num_processed); /* Yes...all done */ + } + + num_packets_in_fifo++; + + } /* End: Process up to packets_per_fifo packets in this fifo */ + + /* + * We exited the loop processing the fifo_index fifo. + * - If we exited because we reached the packets_per_fifo limit, we want + * to turn off this fifo's not-empty status in our shadow copy of the + * status so we process all of the other fifos before re-fetching the + * true status, giving this fifo another chance. + * - If we exited because the fifo was empty according to our snapshot + * of the fifo's tail (head == tail snapshot), we want to turn off this + * fifo's not-empty status in our shadow copy of the status so we + * process all of the other fifos before re-fetching the true status and + * tail for this fifo, giving this fifo another chance. + * Either way, we turn off the status bit. + * + */ + not_empty_status &= ~(fifo_bit_number); + + /* Clear the threshold crossed condition, in case we have gone below + * the threshold. + */ + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + fifo_bit_number, + 0 ); + +#ifdef DEBUG_PRINT + printf("PollNormal: Turning off status bit 0x%08x, status=0x%08x\n",fifo_bit_number,not_empty_status); +#endif + + /* Bump to next fifo */ + fifo_index = (fifo_index+1) % num_fifos_in_group; + + /* + * If we have processed the max number of packets from the previous fifo, + * the recommended next fifo to process is the one after that. + * + */ + if ( num_packets_in_fifo == packets_per_fifo ) + { + *next_fifo_index = fifo_index; + } + + } /* End: Keep looping through the fifos. */ + +} /* End: DMA_RecFifoPollNormalFifos() */ + + + + + +static int dumpmem_count ; + +static inline void quadcpy(void *dest, const void *src) +{ + unsigned int *desti=(unsigned int *) dest ; + const unsigned int *srci=(const unsigned int *) src ; + unsigned int w0 = srci[0] ; + unsigned int w1 = srci[1] ; + unsigned int w2 = srci[2] ; + unsigned int w3 = srci[3] ; + desti[0] = w0 ; + desti[1] = w1 ; + desti[2] = w2 ; + desti[3] = w3 ; +} +/*! + * \brief Poll Normal Reception Fifo Given a Fifo Group and Fifo ID + * + * Poll the specified "normal" reception fifo in the specified fifo group, + * removing one packet after another from the fifo, dispatching the appropriate + * receive function for each packet, until one of the following occurs: + * 1. num_packets packets are received + * 2. The specified fifo is empty + * 3. A receive function returns a non-zero value + * 4. The last packet removed from the fifo has an invalid registration id. The + * error receive function will have been called, but polling ends. + * The invalid packet is counted as a processed packet, and the return + * code from the error receive function is returned. + * + * If the specified fifo has a packet, the appropriate receive function is + * called. Upon return, the packet is removed from the fifo (the fifo head is + * moved past the packet). + * + * After processing num_packets packets in the fifo (or emptying that fifo), + * the function returns the number of packets processed * + * The receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always be 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always return a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. The packet header and payload are + * 16-byte aligned for optimized copying. + * + * \param[in] num_packets The maximum number of packets that will be + * processed. + * \param[in] fifo_id The ID of the fifo to be polled. + * (0 through + * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1). + * \param[in] num_empty_passes When the not-empty status indicates that all + * fifos in the group are emtpy, this is the + * number of times the not-empty status is + * re-fetched and re-checked before officially + * declaring that they are indeed empty. + * (0 means no extra passes are made). + * \param[in] not_empty_poll_delay The number of pclks to delay between polls + * of the not-empty status when the fifos are + * empty. + * \param[in] fg_ptr Pointer to the fifo group. + * + * \param[in] empty_callback Function to call when spinning because the FIFO looks empty. + * + * \retval num_packets_received The number of packets received and processed. + * \retval negative_value The return code from the receive function that + * caused polling to end. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * + */ +int DMA_RecFifoPollNormalFifoById( int num_packets, + int fifo_id, + int num_empty_passes, + int not_empty_poll_delay, + DMA_RecFifoGroup_t *fg_ptr, + void (*empty_callback)(void) + ) +{ + int num_packets_in_fifo; /* Count of packets processed in a */ + /* fifo. */ + unsigned int status; /* Snapshot of the not empty status*/ + /* for this group. */ + int rc = 0; /* Return code from recv_func. */ + int num_processed = 0; /* Number of packets processed */ + DMA_PacketIovec_t io_vec; /* Payload I/O vector */ + DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function */ + void *recv_func_parm;/* Receive function parameter */ + int recv_func_id; /* Function ID from the packet */ + /* header. */ + void *recv_func_payload;/* Pointer to recv func payload */ + void *recv_func_packet; /* Pointer to recv func packet */ + DMA_RecFifo_t *fifo_ptr; /* Pointer to fifo being processed */ + char temp_packet[256] ALIGN_QUADWORD; /* Temporary packet copy. */ + /* Align for efficient copying. */ + char *load_ptr, *store_ptr; /* Used for copying bytes */ + int num_quads; /* Number of quads to copy */ + DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */ + int passes; /* Counter of not-empty passes */ + + SPI_assert( num_packets > 0 ); + SPI_assert( num_empty_passes >= 0 ); + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fifo_id >= 0 ) && + ( fifo_id < DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP ) ); + + fifo_ptr = &(fg_ptr->fifos[fifo_id]); + + /* + * Loop until the specified fifo is declared empty, or + * until one of the termination conditions documented in the prolog occurs. + * + */ + for (;;) + { + /* + * If the DMA SRAM not-empty status for this fifo is zero (the fifo is + * empty), the status is checked num_empty_passes times with a slight + * delay in between to give the DMA time to make progress before declaring + * that the fifo is truely empty. + */ + passes = num_empty_passes; + status = DMA_RecFifoGetNotEmptyById( fg_ptr, + fifo_id ); /* Get Normal fifo */ + /* not-empty status. */ + while ( ( status == 0 ) && + ( num_empty_passes-- > 0 ) ) + { + /* Delay, allowing the DMA to update its status */ + unsigned int pclks = not_empty_poll_delay; + (*empty_callback)() ; + while( pclks-- ) + { + asm volatile("nop;"); + } + + /* Re-fetch the not-empty status */ + status = DMA_RecFifoGetNotEmptyById( + fg_ptr, + fifo_id ); /* Get Normal fifo */ + /* not-empty status.*/ + } + + if ( status == 0 ) { /* Fifo is empty? */ + +#if defined(BGP_DD1_WORKAROUNDS) + if (num_processed > 0) { /* Did we process at least 1 packet? */ + NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */ + /* likely not encountered a fifo full */ + /* condition and stopped. Reset the */ + /* fifo counter so we will start */ + /* tracking empty calls to poll. */ + /* Clear the threshold crossed condition, in case we have gone below + * the threshold. + */ + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + _BN(fifo_ptr->global_fifo_id), + 0 ); + } + else { + if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */ + (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) { + /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */ + rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */ + /* reception fifos filled and the */ + /* DMA has stopped. */ + /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */ + NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */ + /* fill-fifo counter. */ + } + } +#endif + + return (num_processed); + } + + /* The fifo has something in it. + * Update its shadow va_tail pointer to reflect the amount of packet + * data in the fifo. + */ + DMA_RecFifoGetTailById( fg_ptr, + fifo_id ); + + num_packets_in_fifo = 0; + + /* + * MSYNC before we look at the data in the fifo to ensure that snoops + * issued by the DMA have completed. This ensures the L1 cache + * invalidations have completed so we don't look at stale data. + */ + _bgp_msync(); + + /* + * Within a fifo: The area between the va_head and va_tail shadow pointers + * contains packets to be processed. Loop, processing those packets until + * we have processed packets_per_fifo of them, or all of them, or other + * issues come up. + * + */ +#if defined(CONFIG_BGP_STATISTICS) + { + unsigned int tail = (unsigned int) fifo_ptr->dma_fifo.va_tail ; + unsigned int head = (unsigned int) fifo_ptr->dma_fifo.va_head ; + unsigned int end = (unsigned int) fifo_ptr->dma_fifo.va_end ; + unsigned int start = (unsigned int) fifo_ptr->dma_fifo.va_start ; + unsigned int used_space = ( tail >= head ) ? (tail-head) : ((tail-start)+(end-head)) ; + reception_fifo_histogram[fls(used_space >> 4)] += 1 ; + if( used_space > reception_hi_watermark ) reception_hi_watermark = used_space ; + +/* unsigned int used_space = (fifo_ptr->dma_fifo.va_tail >= fifo_ptr->dma_fifo.va_head) */ +/* ? ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) */ +/* : (fifo_ptr->dma_fifo.fifo_size + ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) ) */ +/* ; */ +/* reception_fifo_histogram[fls(used_space)] += 1 ; */ + } +#endif + while ( ( num_packets_in_fifo < num_packets ) && + ( fifo_ptr->dma_fifo.va_head != fifo_ptr->dma_fifo.va_tail ) ) + { + DMA_RecFifoGetAddresses( fifo_ptr, + &io_vec ); /* Get the payload pointer(s) */ + /* for the packet at the head */ + /* of the fifo. */ + + packet_ptr = (DMA_PacketHeader_t*) + fifo_ptr->dma_fifo.va_head; /* Point to packet header*/ + /* + * Determine the receive function to call. Index into + * recvFunctions array is in the packet header. + */ + recv_func_id = packet_ptr->Func_Id; + recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id]; + if ( recv_func_ptr != NULL ) + { + recv_func_parm = + DMA_RecFifoInfo.recvFunctionsParms[recv_func_id]; + } + else + { + recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction; + recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm; + } + /* + * Use a temporary copy of the packet, when the payload + * wraps. + */ + if ( io_vec.num_segments > 1 ) + { + /* Copy packet header and first payload segment */ + load_ptr = (char*)packet_ptr; + store_ptr = temp_packet; + num_quads = (sizeof(DMA_PacketHeader_t) + io_vec.num_bytes[0]) >> 4; + while ( num_quads > 0 ) + { + /* Don't bother doing this via doublehummer; it only happens 'occasionally' and means the caller has to enable for floating-point */ + quadcpy(store_ptr,load_ptr) ; +/* _bgp_QuadLoad ( load_ptr, 0 ); */ +/* _bgp_QuadStore( store_ptr, 0 ); */ + load_ptr += 16; + store_ptr += 16; + num_quads--; + } + /* Copy second payload segment */ + load_ptr = (char*)io_vec.payload_ptr[1]; + num_quads = io_vec.num_bytes[1] >> 4; + while ( num_quads > 0 ) + { + quadcpy(store_ptr,load_ptr) ; +/* _bgp_QuadLoad ( load_ptr, 0 ); */ +/* _bgp_QuadStore( store_ptr, 0 ); */ + load_ptr += 16; + store_ptr += 16; + num_quads --; + } + recv_func_payload = temp_packet + sizeof(DMA_PacketHeader_t); + recv_func_packet = temp_packet; + + } /* End: Set up temporary copy of split packet */ + + else /* Set up for contiguous packet */ + { + recv_func_payload = (char*)packet_ptr + + sizeof(DMA_PacketHeader_t); + recv_func_packet = packet_ptr; + } + + /* Call the receive function */ + if( recv_func_ptr ) + { +/* dumpmem(recv_func_packet-32, 128, "Software FIFO around call") ; */ + rc = (*recv_func_ptr)(fifo_ptr, + recv_func_packet, + recv_func_parm, + recv_func_payload, + io_vec.num_bytes[0]+io_vec.num_bytes[1]); + } + else + { + printk(KERN_ERR "DMA_RecFifoPollNormalFifoById recv_func_ptr was NULL recv_func_id=%02x fifo_ptr=%p recv_func_packet=%p recv_func_parm=%p recv_func_payload=%p length=%d\n", + recv_func_id,fifo_ptr,recv_func_packet,recv_func_parm,recv_func_payload,io_vec.num_bytes[0]+io_vec.num_bytes[1]) ; + if( dumpmem_count < 10 ) + { + dumpmem(recv_func_packet-256, 512, "Software FIFO around misread") ; + dumpmem_count += 1 ; + } +/* show_tlbs((unsigned int) recv_func_packet) ; */ +/* (void)dma_map_single(NULL,recv_func_packet-32, 128,DMA_FROM_DEVICE) ; */ +/* dumpmem(recv_func_packet-32, 128, "Software FIFO around misread after cache discard") ; */ + } + + /* Increment the head by the size of the packet */ + DMA_RecFifoIncrementHead(fifo_ptr, + (io_vec.num_bytes[0]+ + io_vec.num_bytes[1] + + sizeof(DMA_PacketHeader_t))>> 4); + + num_processed++; + + if ( rc != 0 ) /* Did receive function fail? */ + { + /* Clear the threshold crossed condition, in case we have gone below + * the threshold. + */ + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + _BN(fifo_ptr->global_fifo_id), + 0 ); + return (rc); /* Yes...return that return code */ + } + + if ( num_processed >= num_packets ) /* Got what they wanted? */ + { + /* Clear the threshold crossed condition, in case we have gone below + * the threshold. + */ + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + _BN(fifo_ptr->global_fifo_id), + 0 ); + return (num_processed); /* Yes...all done */ + } + + num_packets_in_fifo++; + + } /* End: Process up to packets_per_fifo packets in this fifo */ + + } /* End: Keep looping through the fifo. */ + +} /* End: DMA_RecFifoPollNormalFifoById() */ + + + + +/*! + * + * \brief Prime Receive Function Cache for Polling Function + * + * The reception fifo receive function maintains a simple cache of information + * about the last receive function called. This function is called to return + * that information for a given function ID. + * + * \param [in] recv_func_id The function ID whose receive function info + * is to be returned. + * \param [out] recv_func_ptr Pointer to the receive function's address, + * returned by this function. + * \param [out] recv_func_parm Pointer to the receive function's parameter. + * + * \return The information (function pointer and function parameter) for the + * specified receive function is returned as described. + */ +inline +void DMA_RecFifoPollPrimeRecvFuncCache( int recv_func_id, + DMA_RecFifoRecvFunction_t *recv_func_ptr, + void **recv_func_parm ) +{ + DMA_RecFifoRecvFunction_t local_recv_func_ptr; + void *local_recv_func_parm; + + local_recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id]; + if ( local_recv_func_ptr != NULL ) { + local_recv_func_parm = + DMA_RecFifoInfo.recvFunctionsParms[recv_func_id]; + } + else { + local_recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction; + local_recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm; + } + *recv_func_ptr = local_recv_func_ptr; + *recv_func_parm= local_recv_func_parm; + +} /* End: DMA_RecFifoPrimeRecvFuncCache() */ + + + + +/*! + * + * \brief Process a Wrap of a Reception Fifo While Polling + * + * This function is meant to be called by a polling function that has processed + * packets in a reception fifo such that there are just a few left to be + * processed before it hits the end of the fifo and wraps. This function + * processes those packets at the end of the fifo until the wrap occurs, + * and then returns, leaving the rest of the packets in the fifo to be + * processed by the calling function. + * + * \param[in] rec_fifo_ptr Pointer to reception fifo + * \param[in,out] va_head Pointer to the fifo's virtual address + * head. Updated by this function. + * \param[in,out] va_tail Pointer to the fifo's virtual address + * tail. Updated by this function. + * \param[in,out] num_processed Pointer to the number of packets + * processed by the calling poll + * function. Updated by this function. + * \param[in,out] num_processed_in_fifo Pointer to the number of packets + * in this particular fifo processed + * by the calling poll function. + * Updated by this function. + * \param[in] max_num_packets The max number of packets that can be + * processed before the poll function + * must return. + * \param[in] max_num_packets_in_fifo The max number of packets that can be + * processed in this fifo. + * + * \retval 0 Processing complete successfully. Output + * parameters have been updated as described. + * \retval negative_value The return code from the receive function that + * caused polling to end. + */ + +int DMA_RecFifoPollProcessWrap ( DMA_RecFifo_t *rec_fifo_ptr, + void **va_head, + void *va_tail, + int *num_processed, + int *num_processed_in_fifo, + int max_num_packets, + int max_num_packets_in_fifo) { + int rc = 0; + DMA_PacketIovec_t io_vec; /* Payload I/O vector */ + DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */ + DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function */ + void *recv_func_parm;/* Receive function parameter */ + int recv_func_id; /* Function ID from the packet */ + /* header. */ + void *recv_func_payload;/* Pointer to recv func payload */ + void *recv_func_packet; /* Pointer to recv func packet */ + char temp_packet[256] ALIGN_QUADWORD; /* Temporary packet copy. */ + /* Align for efficient copying. */ + char *load_ptr, *store_ptr; /* Used for copying bytes */ + int num_quads; /* Number of quads to copy */ + + while ( rc == 0 ) { /* Loop while things are good until we exit after */ + /* processing the wrap. */ + + DMA_RecFifoGetAddresses( rec_fifo_ptr, + &io_vec ); /* Get the payload pointer(s) */ + /* for the packet at the head */ + /* of the fifo. */ + + packet_ptr = (DMA_PacketHeader_t*) + rec_fifo_ptr->dma_fifo.va_head; /* Point to packet header */ + + /* + * Determine the receive function to call. Index into + * recvFunctions array is in the packet header. + */ + recv_func_id = packet_ptr->Func_Id; + recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id]; + if ( recv_func_ptr != NULL ) + { + recv_func_parm = + DMA_RecFifoInfo.recvFunctionsParms[recv_func_id]; + } + else + { + recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction; + recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm; + } + /* + * Use a temporary copy of the packet, when the payload + * wraps. + */ + if ( io_vec.num_segments > 1 ) + { + /* Copy packet header and first payload segment */ + load_ptr = (char*)packet_ptr; + store_ptr = temp_packet; + num_quads = (sizeof(DMA_PacketHeader_t) + io_vec.num_bytes[0]) >> 4; + while ( num_quads > 0 ) + { + _bgp_QuadLoad ( load_ptr, 0 ); + _bgp_QuadStore( store_ptr, 0 ); + load_ptr += 16; + store_ptr += 16; + num_quads --; + } + /* Copy second payload segment */ + load_ptr = (char*)io_vec.payload_ptr[1]; + num_quads = io_vec.num_bytes[1] >> 4; + while ( num_quads > 0 ) + { + _bgp_QuadLoad ( load_ptr, 0 ); + _bgp_QuadStore( store_ptr, 0 ); + load_ptr += 16; + store_ptr += 16; + num_quads --; + } + recv_func_payload = temp_packet + sizeof(DMA_PacketHeader_t); + recv_func_packet = temp_packet; + + } /* End: Set up temporary copy of split packet */ + + else /* Set up for contiguous packet */ + { + recv_func_payload = (char*)packet_ptr + + sizeof(DMA_PacketHeader_t); + recv_func_packet = packet_ptr; + } + + /* Call the receive function */ + if( recv_func_ptr) + { + rc = (*recv_func_ptr)(rec_fifo_ptr, + recv_func_packet, + recv_func_parm, + recv_func_payload, + io_vec.num_bytes[0]+io_vec.num_bytes[1]); + } + else + { + printk(KERN_ERR "DMA_RecFifoPollProcessWrap recv_func_ptr was NULL recv_func_id=%02x rec_fifo_ptr=%p recv_func_packet=%p recv_func_parm=%p recv_func_payload=%p length=%d\n", + recv_func_id,rec_fifo_ptr,recv_func_packet,recv_func_parm,recv_func_payload,io_vec.num_bytes[0]+io_vec.num_bytes[1]) ; + + } + + /* Increment the head by the size of the packet */ + DMA_RecFifoIncrementHead(rec_fifo_ptr, + (io_vec.num_bytes[0]+ + io_vec.num_bytes[1] + + sizeof(DMA_PacketHeader_t))>> 4); + *va_head = rec_fifo_ptr->dma_fifo.va_head; /* Refresh caller's head */ + + (*num_processed)++; + (*num_processed_in_fifo)++; + +#ifdef DEBUG_PRINT + printf("PollWrap: num_processed=%d, va_head=0x%08x, Part1Len=%d, Part2Len=%d, Part1Ptr=0x%08x, Part2Ptr=0x%08x\n",*num_processed,(unsigned)*va_head,io_vec.num_bytes[0],io_vec.num_bytes[1],(unsigned)io_vec.payload_ptr[0],(unsigned)io_vec.payload_ptr[1]); +#endif + + if ( ( (unsigned)*va_head < (unsigned)packet_ptr ) || /* Did we wrap? */ + ( *num_processed >= max_num_packets ) || /* Got enough packets? */ + ( *num_processed_in_fifo > max_num_packets_in_fifo ) ) /* Got enough */ + /* packets for this fifo? */ + { + break; + } + + } /* End: Keep looping through the fifo. */ + + return(rc); + +} /* End: DMA_RecFifoPollProcessWrap() */ + + +/*! + * \brief Simple Poll Normal Reception Fifos + * + * Poll the "normal" reception fifos in the specified fifo group, removing one + * packet after another from the fifos, dispatching the appropriate receive + * function for each packet, until one of the following occurs: + * 1. All packets in all of the fifos have been received. + * 2. A receive function returns a non-zero value. + * 3. The last packet removed from a fifo has an invalid registration id. The + * error receive function will have been called, but polling ends. + * The invalid packet is counted as a processed packet, and the return + * code from the error receive function is returned. + * 4. There have been fruitfulPollLimit polls attempted (summed across all + * fifos). + * + * Polling occurs in a round-robin fashion through the array of normal fifos in + * the group. If a fifo has a packet, the appropriate receive function is + * called. Upon return, the packet is removed from the fifo (the fifo head is + * moved past the packet). + * + * After processing all of the packets in a fifo (or emptying that fifo), + * the next fifo in the group is processed. When the last index in the fifo + * array is processed, processing continues with the first fifo in the array. + * Multiple loops through the array of fifos in the group may occur until all + * fifos are empty or fruitfulPollLimit polls have been completed. + * + * It is risky to set the fruitfulPollLimit to zero, allowing this function to + * poll indefinitely as long as there are packets to be processed. This may + * starve the node in a scenario where other nodes send "polling" packets to + * our node, and our node never gets a chance to do anything else except + * process those polling packets. + * + * The receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always be 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always return a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. The packet header and payload are + * 16-byte aligned for optimized copying. + * + * \param[in] fg_ptr Pointer to the fifo group. + * \param[in] fruitfulPollLimit The limit on the number of fruitful polls that + * will be attempted (summed across all fifos). + * If the limit is reached, this function + * returns. A value of zero means there is no + * limit imposed. A fruitful poll is one where + * at least one packet has arrived in the fifo + * since the last poll. + * + * \retval num_packets_received The number of packets received and processed. + + * \retval negative_value The return code from the receive function that + * caused polling to end. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * + */ +int DMA_RecFifoSimplePollNormalFifos( DMA_RecFifoGroup_t *fg_ptr, + int fruitfulPollLimit) +{ + int rc = 0; /* Return code from recv_func. */ + int num_processed = 0; /* Number of packets processed */ + int num_processed_in_fifo = 0; /* Not used, but needed for calling*/ + /* wrap function. */ + int fruitfulPollCount; /* Number of fruitful polls. */ + + /* + *The following is actually a cache of the last receive function called. + * We cache it so we don't need to keep looking up the receive function + * info on each packet. + */ + DMA_RecFifoRecvFunction_t recv_func_ptr=NULL; /* Pointer to receive function*/ + void *recv_func_parm=NULL;;/* Receive function parameter */ + int recv_func_id=-1; /* Function ID from the packet */ + /* header. Init to -1 means */ + /* recv_func_ptr and */ + /* recv_func_parm do not cache */ + /* the previous packet values. */ + + DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */ + unsigned int packet_bytes; /* Number of bytes in the packet. */ + unsigned int wrap; /* 1: A wrap of the fifo is going */ + /* to occur. */ + /* 0: No wrap is going to occur. */ + + /* + * Processing of packets occurs in the fifo in three phases: + * Normal Phase 1 : Packets before the wrap. + * Handle Wrap Phase: Packets during the wrap. + * Normal Phase 2 : Packets after the wrap. + */ + void *va_logical_tail; /* The point beyond which normal */ + /* processing of packets ends. */ + void *va_starting_head; /* Pointer to the first packet in */ + /* a contiguous group extracted */ + /* from the fifo. */ + void *va_nextHead; /* Pointer to the next packet to */ + /* be processed. */ + void *va_tail; /* Snapshot of the fifo's tail. */ + unsigned int num_packets_processed_since_moving_fifo_head; /* + Tells us when we should move the + hardware head. */ + + /* + * Control variables for looping through the fifos + */ + int fifo_index=0; /* Index of fifo being processed. */ + /* Start with first fifo. */ + unsigned int fifo_bit_number; /* The bit number of the fifo */ + /* being processed. Group0: 0-7, */ + /* Group1: 8-15, Group2: 16-23, */ + /* Group3: 24-31. Corresponds to */ + /* the DMA not-empty status bits. */ + int num_fifos_in_group; /* Number of fifos in this group. */ + int num_packets_in_fifo; /* Count of packets processed in a */ + /* fifo. */ + unsigned int not_empty_status=0; /* Snapshot of the not empty status*/ + /* for this group. 0 indicates */ + /* that no snapshot has occurred */ + /* yet. */ + DMA_RecFifo_t *rec_fifo_ptr; /* Pointer to reception fifo being */ + /* processed. */ + + + SPI_assert( fg_ptr != NULL ); + + num_fifos_in_group = fg_ptr->num_normal_fifos; + + /* + * Start the fruitful poll count at the max. + * For unlimited, set to a very high value. + */ + fruitfulPollCount = (fruitfulPollLimit == 0) ? 0x7FFFFFFF : fruitfulPollLimit; + + /* + * Circularly loop through the not-empty fifos in the fifo group. + * Keep going until one of the termination conditions documented in the + * prolog occurs. + * + */ + for (;;) { + /* + * Find the next fifo to process. + */ + rec_fifo_ptr = &fg_ptr->fifos[fifo_index]; /* This is the fifo itself*/ + fifo_bit_number = _BN(rec_fifo_ptr->global_fifo_id);/* fifo's status bit*/ + + fifo_index = DMA_RecFifoGetNextFifo(fg_ptr, + fifo_index, + &fifo_bit_number, + 0, /* num_empty_passes */ + 0, /* not_empty_poll_delay */ + ¬_empty_status); + if (fifo_index < 0) { /* No more packets to process? */ +#if defined(BGP_DD1_WORKAROUNDS) + /* + * + * If there are no more non-empty fifos, count the number of consecutive + * times the poll function came up dry (num_processed == 0), and if it + * exceeds a threshold, issue a system call to clear the rDMA's "full + * reception fifo" condition so it begins to receive packets again. + * + * When a non-empty fifo is returned, its shadow va_tail pointer has been + * updated to reflect the amount of packet data in the fifo. + */ + if (num_processed > 0) { /* Did we process at least 1 packet? */ + NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */ + /* likely not encountered a fifo full */ + /* condition and stopped. Reset the */ + /* fifo counter so we will start */ + /* tracking empty calls to poll. */ + } + else { + if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */ + (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) { + /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */ + rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */ + /* reception fifos filled and the */ + /* DMA has stopped. */ + /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */ + NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */ + /* fill-fifo counter. */ + } + } +#endif + /* printf("Poll: returned %d processed\n",num_processed); */ + return (num_processed); + } + + num_packets_in_fifo = 0; + + /* + * Establish pointers to the reception fifo and the DMA fifo. + * Snapshot the hardware head and tail pointers...they may change while we + * are running. We will snapshot the tail again after processing everything + * up to this snapshot, until the fifo is empty (head == tail). + */ + rec_fifo_ptr = &(fg_ptr->fifos[fifo_index]); + DMA_Fifo_t *fifo_ptr = &(rec_fifo_ptr->dma_fifo); + void *va_head = fifo_ptr->va_head; + va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */ + /* tail. */ + num_packets_processed_since_moving_fifo_head = + rec_fifo_ptr->num_packets_processed_since_moving_fifo_head; /* Fetch */ + /* for later use. */ + +#if defined(CONFIG_BGP_STATISTICS) + { + unsigned int used_space = (fifo_ptr->va_tail >= fifo_ptr->va_head) + ? ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 ) + : (fifo_ptr->fifo_size + ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 ) ) + ; + reception_fifo_histogram[fls(used_space)] += 1 ; + } +#endif + /* + * Loop processing packets until the fifo is empty or until the fruitful poll + * limit is reached. + * At the top of the loop, we have a new snapshot of the tail, so something + * may have appeared in the fifo. + */ + while ( ( rc == 0 ) && + ( va_tail != va_head ) && + ( fruitfulPollCount > 0) ) { /* Is there something in this fifo? */ + /* Yes... */ + fruitfulPollCount--; /* Count the polls */ + + /* + * MSYNC before we look at the data in the fifo to ensure that snoops + * issued by the DMA have completed. This ensures the L1 cache + * invalidations have completed so we don't look at stale data. + */ + _bgp_msync(); + + /* + * Touch the first packet right away so it is is loaded into the memory + * cache before we try to use it. + */ + _bgp_dcache_touch_line( va_head ); + + /* + * Prepare to split up the processing between "normal" and "handleWrap". + * Establish a "logicalTail" which is the point beyond which "normal" + * processing changes to "handleWrap" processing. + */ + if ( va_head < va_tail ) { /* No wrap will occur? */ + wrap = 0; + va_logical_tail = va_tail; /* Logical tail is the physical tail */ + } + else { /* Wrap will occur. Logical tail is 256 bytes before the end + * of the fifo. We need to stop normal phase 1 there because + * that is the first point at which the next packet could wrap. + */ + wrap = 1; + va_logical_tail = (void*)( ((unsigned)fifo_ptr->va_end) - 256 ); + } + + /* Loop processing packets until we hit our tail snapshot */ + while ( ( rc == 0 ) && + ( va_head != va_tail ) ) { + /* + * Process packets that do not wrap. This is everything up to the + * logical tail. This gets executed both before and after wrapping. + * This is normal phase 1 and normal phase 2. + */ + va_starting_head = va_head; + + while ( ( rc == 0 ) && + ( va_head < va_logical_tail ) ) { + + packet_ptr = (DMA_PacketHeader_t*)va_head; + packet_bytes = (packet_ptr->Chunks + 1) << 5; + + /* + * Touch the NEXT packet to ensure it will be in L1 cache when we + * are ready for it on the next iteration. Even though the packet will + * likely be touched in its entirety by the receive function, and that + * will likely cause the processor to perform prefetching of the next + * packet, bringing in the next packet now has been shown to improve + * bandwidth from 1.41 bytes/cycle to 1.44 bytes/cycle, so we put + * this dcbt here. + */ + va_nextHead = (void*) ( (unsigned)va_head + packet_bytes ); + + if ( va_nextHead < va_logical_tail ) + _bgp_dcache_touch_line( va_nextHead ); + + /* + * Determine the receive function to call. + * The packet header Func_Id contains the ID of the function to call. + * We cache the previous packet's values because it is likely this + * packet will be the same. If not, call out of line function to + * re-prime the cache. + */ + if ( packet_ptr->Func_Id != recv_func_id ) { + recv_func_id = packet_ptr->Func_Id; + DMA_RecFifoPollPrimeRecvFuncCache( recv_func_id, + &recv_func_ptr, + &recv_func_parm ); + } + + /* Call the receive function, and no matter what happens, increment + * the number of packets processed and move our head snapshot to the + * next packet. + */ + if( recv_func_ptr) + { + rc = (*recv_func_ptr)( rec_fifo_ptr, + packet_ptr, + recv_func_parm, + (char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)), + packet_bytes - sizeof(DMA_PacketHeader_t) ); + } + else + { + printk(KERN_ERR "DMA_RecFifoSimplePollNormalFifos recv_func_ptr was NULL recv_func_id=%02x rec_fifo_ptr=%p packet_ptr=%p recv_func_parm=%p recv_func_payload=%p length=%d\n", + recv_func_id,rec_fifo_ptr,packet_ptr,recv_func_parm,(char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),packet_bytes - sizeof(DMA_PacketHeader_t)) ; + + } + num_packets_processed_since_moving_fifo_head++; + num_packets_in_fifo++; + +#ifdef DEBUG_PRINT + printf("SimplePollById: num_processed=%d, va_head=0x%08x, va_tail=0x%08x, va_logical_tail=0x%08x, va_end=0x%08x, willWrap=%d\n",num_processed,(unsigned)va_head,(unsigned)va_tail,(unsigned)va_logical_tail,(unsigned)fifo_ptr->va_end,wrap); +#endif + + va_head = va_nextHead; + + } /* End: Process packets that do not wrap */ + + /* + * We are done processing all packets prior to the wrap. + * If the shadow va_head is not in sync with the hardware head, or if + * we are going to wrap, sync up the hardware head and recalculate the + * free space. The movement of the head causes the fifo's free space + * to be recalculated. + * + * The wrap function requires that the shadow and hardware heads be in + * sync. If we are not wrapping, we condition the syncing of the heads + * on whether we have exceeded our limit on the number of packets we + * processed in a fifo since the last time we moved the + * hardware head. If we have only processed a few packets, we just + * leave the hardware head where it is and don't incur the expense of + * moving the hardware head. If we have processed at least our limit + * of packets, then it is good to move the hardware head. + */ + if ( ( num_packets_processed_since_moving_fifo_head > + DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD ) || + ( wrap ) ) { + + DMA_FifoSetHead( fifo_ptr, va_head ); + + num_packets_processed_since_moving_fifo_head = 0; + } + + /* + * If we are anticipating a wrap, go handle the wrap. + */ + if ( ( rc == 0 ) && wrap ) { + /* + * Handle the wrapping of the fifo. This requires extra checking + * and moving of the head, and thus is in its own function. + * It is a generic function, used by other poll functions. Some of + * these other poll functions have the ability to quit processing + * packets when a specified limit is reached overall, or per fifo. + * That is what the last two parameters specify. For this poll + * function, we don't have any limit...we process packets until the + * fifo is empty, so we pass in large unreachable limits. + */ + rc = DMA_RecFifoPollProcessWrap ( + rec_fifo_ptr, + &va_head, + va_tail, + &num_processed, + &num_processed_in_fifo, + 0x7FFFFFFF, /* Infinite packet limit, overall */ + 0x7FFFFFFF);/* Infinite packet limit per fifo */ + + va_logical_tail = va_tail; /* Set to actual tail now. */ + wrap = 0; /* Next time around, don't do wrap processing. */ + } + + } /* End: Process packets until we hit our snapshotted tail */ + +#if defined(BGP_DD1_WORKAROUNDS) + NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */ + /* likely not encountered a fifo full */ + /* condition and stopped. Reset the */ + /* fifo counter so we will start */ + /* tracking empty calls to poll. */ +#endif + + va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */ + /* tail again. */ + + } /* End: Loop while there is something in the fifo */ + + /* + * The fifo is now empty. If we have processed at least one packet, + * return the number, or if the receive function returned an error, + * return that return code. + */ + if ( num_packets_in_fifo > 0 ) { + /* Store in the fifo structure the number of packets processed since + * last moving the hardware head, and the current head */ + rec_fifo_ptr->num_packets_processed_since_moving_fifo_head = + num_packets_processed_since_moving_fifo_head; + fifo_ptr->va_head = va_head; + num_processed += num_packets_in_fifo; + /* Clear the threshold crossed condition, in case we have gone below + * the threshold. + */ + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + _BN(rec_fifo_ptr->global_fifo_id), + 0 ); + + /* If the receive function returned an error, exit with that error now */ + if ( rc ) return (rc); + } + /* + * We exited the loop processing the fifo_index fifo. + * - If we exited because the fifo was empty according to our snapshot + * of the fifo's tail (head == tail snapshot), we want to turn off this + * fifo's not-empty status in our shadow copy of the status so we + * process all of the other fifos before re-fetching the true status and + * tail for this fifo, giving this fifo another chance. + */ + not_empty_status &= ~(fifo_bit_number); + +#ifdef DEBUG_PRINT + printf("PollNormal: Turning off status bit 0x%08x, status=0x%08x\n",fifo_bit_number,not_empty_status); +#endif + + /* Bump to next fifo */ + fifo_index = (fifo_index+1) % num_fifos_in_group; + + } /* End: for loop processing reception fifos */ + +} /* End: DMA_RecFifoSimplePollNormalFifos() */ + + + + +/*! + * \brief Simple Poll Normal Reception Fifo Given a Fifo Group and Fifo ID + * + * Poll the specified "normal" reception fifo in the specified fifo group, + * removing one packet after another from the fifo, dispatching the appropriate + * receive function for each packet, until one of the following occurs: + * 1. All packets in the fifo have been received. + * 2. The specified fifo is empty. + * 3. A receive function returns a non-zero value. + * 4. The last packet removed from the fifo has an invalid registration id. The + * error receive function will have been called, but polling ends. + * The invalid packet is counted as a processed packet, and the return + * code from the error receive function is returned. + * 5. There have been fruitfulPollLimit polls attempted. + * + * If the specified fifo has a packet, the appropriate receive function is + * called. Upon return, the packet is removed from the fifo (the fifo head is + * moved past the packet). + * + * After processing all of the packets in the fifo (emptying that fifo), + * or the fruitfulPollLimit has been reached, the function returns the number + * of packets processed. + * + * It is risky to set the fruitfulPollLimit to zero, allowing this function to + * poll indefinitely as long as there are packets to be processed. This may + * starve the node in a scenario where other nodes send "polling" packets to + * our node, and our node never gets a chance to do anything else except + * process those polling packets. + * + * The receive functions must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header, pointer to the payload, and + * length of the payload. The packet header is always be 16 bytes of + * contiguous storage, in the fifo. Because the fifo is a circular buffer, + * the payload of a packet may wrap from the end of the fifo to the beginning. + * For large fifos, this happens infrequently. To make it easier for + * user/messaging code, the poll function will always pass a starting payload + * address and number of bytes so that the receive function can treat the packet + * as contiguous storage in memory. If the packet does not wrap, the starting + * payload address will be a pointer to the appropriate address in the fifo. + * If the packet does wrap, the poll function will copy bytes from the fifo to + * a contiguous buffer (on the stack) and call the receive function with a + * payload pointer pointing to this temporary buffer. In either case, when the + * receive function returns, user code cannot assume that the payload buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function has + * to copy it to some other location. The packet header and payload are + * 16-byte aligned for optimized copying. + * + * \param[in] fifo_id The ID of the fifo to be polled. + * (0 through + * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1). + * \param[in] fg_ptr Pointer to the fifo group. + * \param[in] fruitfulPollLimit The limit on the number of fruitful polls that + * will be attempted. + * If the limit is reached, this function + * returns. A value of zero means there is no + * limit imposed. A fruitful poll is one where + * at least one packet has arrived in the fifo + * since the last poll. + * + * \retval num_packets_received The number of packets received and processed. + * \retval negative_value The return code from the receive function that + * caused polling to end. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * + */ +int DMA_RecFifoSimplePollNormalFifoById( int fifo_id, + DMA_RecFifoGroup_t *fg_ptr, + int fruitfulPollLimit + ) +{ + int rc = 0; /* Return code from recv_func. */ + int num_processed = 0; /* Number of packets processed */ + int num_processed_in_fifo = 0; /* Not used, but needed for calling*/ + /* wrap function. */ + int fruitfulPollCount; /* Number of fruitful polls. */ + + /* + *The following is actually a cache of the last receive function called. + * We cache it so we don't need to keep looking up the receive function + * info on each packet. + */ + DMA_RecFifoRecvFunction_t recv_func_ptr=NULL; /* Pointer to receive function*/ + void *recv_func_parm=NULL;/* Receive function parameter */ + int recv_func_id=-1; /* Function ID from the packet */ + /* header. Init to -1 means */ + /* recv_func_ptr and */ + /* recv_func_parm do not cache */ + /* the previous packet values. */ + + DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */ + unsigned int packet_bytes; /* Number of bytes in the packet. */ + unsigned int wrap; /* 1: A wrap of the fifo is going */ + /* to occur. */ + /* 0: No wrap is going to occur. */ + + /* + * Processing of packets occurs in the fifo in three phases: + * Normal Phase 1 : Packets before the wrap. + * Handle Wrap Phase: Packets during the wrap. + * Normal Phase 2 : Packets after the wrap. + */ + void *va_logical_tail; /* The point beyond which normal */ + /* processing of packets ends. */ + void *va_starting_head; /* Pointer to the first packet in */ + /* a contiguous group extracted */ + /* from the fifo. */ + void *va_nextHead; /* Pointer to the next packet to */ + /* be processed. */ + void *va_tail; /* Snapshot of the fifo's tail. */ + unsigned int num_packets_processed_since_moving_fifo_head; /* + Tells us when we should move the + hardware head. */ + + SPI_assert( fg_ptr != NULL ); + SPI_assert( ( fifo_id >= 0 ) && + ( fifo_id < DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP ) ); + /* + * Start the fruitful poll count at the max. + * For unlimited, set to a very high value. + */ + fruitfulPollCount = (fruitfulPollLimit == 0) ? 0x7FFFFFFF : fruitfulPollLimit; + + /* + * Establish pointers to the reception fifo and the DMA fifo. + * Snapshot the hardware head and tail pointers...they may change while we + * are running. We will snapshot the tail again after processing everything + * up to this snapshot, until the fifo is empty (head == tail). + */ + DMA_RecFifo_t *rec_fifo_ptr = &(fg_ptr->fifos[fifo_id]); + DMA_Fifo_t *fifo_ptr = &(rec_fifo_ptr->dma_fifo); + void *va_head = fifo_ptr->va_head; + va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */ + /* tail. */ + num_packets_processed_since_moving_fifo_head = + rec_fifo_ptr->num_packets_processed_since_moving_fifo_head; /* Fetch */ + /* for later use. */ + +#if defined(CONFIG_BGP_STATISTICS) + { + unsigned int used_space = (fifo_ptr->va_tail >= fifo_ptr->va_head) + ? ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 ) + : (fifo_ptr->fifo_size + ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 ) ) + ; + reception_fifo_histogram[fls(used_space)] += 1 ; + } +#endif + /* + * Loop processing packets until the fifo is empty or the fruitfulPollLimit + * has been reached. + * At the top of the loop, we have a new snapshot of the tail, so something + * may have appeared in the fifo. + */ + while ( ( rc == 0 ) && + ( va_tail != va_head ) && + ( fruitfulPollCount > 0 ) ) { /* Is there something in this fifo? */ + /* Yes... */ + fruitfulPollCount--; /* Count the polls */ + + /* + * MSYNC before we look at the data in the fifo to ensure that snoops + * issued by the DMA have completed. This ensures the L1 cache + * invalidations have completed so we don't look at stale data. + */ + _bgp_msync(); + + /* + * Touch the first packet right away so it is is loaded into the memory + * cache before we try to use it. + */ + _bgp_dcache_touch_line( va_head ); + + /* + * Prepare to split up the processing between "normal" and "handleWrap". + * Establish a "logicalTail" which is the point beyond which "normal" + * processing changes to "handleWrap" processing. + */ + if ( va_head < va_tail ) { /* No wrap will occur? */ + wrap = 0; + va_logical_tail = va_tail; /* Logical tail is the physical tail */ + } + else { /* Wrap will occur. Logical tail is 256 bytes before the end + * of the fifo. We need to stop normal phase 1 there because + * that is the first point at which the next packet could wrap. + */ + wrap = 1; + va_logical_tail = (void*)( ((unsigned)fifo_ptr->va_end) - 256 ); + } + + /* Loop processing packets until we hit our tail snapshot */ + while ( ( rc == 0 ) && + ( va_head != va_tail ) ) { + /* + * Process packets that do not wrap. This is everything up to the + * logical tail. This gets executed both before and after wrapping. + * This is normal phase 1 and normal phase 2. + */ + va_starting_head = va_head; + + while ( ( rc == 0 ) && + ( va_head < va_logical_tail ) ) { + + packet_ptr = (DMA_PacketHeader_t*)va_head; + packet_bytes = (packet_ptr->Chunks + 1) << 5; + + /* + * Touch the NEXT packet to ensure it will be in L1 cache when we + * are ready for it on the next iteration. Even though the packet will + * likely be touched in its entirety by the receive function, and that + * will likely cause the processor to perform prefetching of the next + * packet, bringing in the next packet now has been shown to improve + * bandwidth from 1.41 bytes/cycle to 1.44 bytes/cycle, so we put + * this dcbt here. + */ + va_nextHead = (void*) ( (unsigned)va_head + packet_bytes ); + + if ( va_nextHead < va_logical_tail ) + _bgp_dcache_touch_line( va_nextHead ); + + /* + * Determine the receive function to call. + * The packet header Func_Id contains the ID of the function to call. + * We cache the previous packet's values because it is likely this + * packet will be the same. If not, call out of line function to + * re-prime the cache. + */ + if ( packet_ptr->Func_Id != recv_func_id ) { + recv_func_id = packet_ptr->Func_Id; + DMA_RecFifoPollPrimeRecvFuncCache( recv_func_id, + &recv_func_ptr, + &recv_func_parm ); + } + + /* Call the receive function, and no matter what happens, increment + * the number of packets processed and move our head snapshot to the + * next packet. + */ + SPI_assert ( recv_func_ptr != NULL ); + + if( recv_func_ptr) + { + rc = (*recv_func_ptr)( rec_fifo_ptr, + packet_ptr, + recv_func_parm, + (char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)), + packet_bytes - sizeof(DMA_PacketHeader_t) ); + } + else + { + printk(KERN_ERR "DMA_RecFifoSimplePollNormalFifoById recv_func_ptr was NULL recv_func_id=%02x rec_fifo_ptr=%p packet_ptr=%p recv_func_parm=%p recv_func_payload=%p length=%d\n", + recv_func_id,rec_fifo_ptr,packet_ptr,recv_func_parm,(char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),packet_bytes - sizeof(DMA_PacketHeader_t)) ; + + } + num_processed++; + num_packets_processed_since_moving_fifo_head++; + +#ifdef DEBUG_PRINT + printf("SimplePollById: num_processed=%d, va_head=0x%08x, va_tail=0x%08x, va_logical_tail=0x%08x, va_end=0x%08x, willWrap=%d\n",num_processed,(unsigned)va_head,(unsigned)va_tail,(unsigned)va_logical_tail,(unsigned)fifo_ptr->va_end,wrap); +#endif + + va_head = va_nextHead; + + } /* End: Process packets that do not wrap */ + + /* + * We are done processing all packets prior to the wrap. + * If the shadow va_head is not in sync with the hardware head, or if + * we are going to wrap, sync up the hardware head and recalculate the + * free space. The movement of the head causes the fifo's free space + * to be recalculated. + * + * The wrap function requires that the shadow and hardware heads be in + * sync. If we are not wrapping, we condition the syncing of the heads + * on whether we have exceeded our limit on the number of packets we + * processed in a fifo since the last time we moved the + * hardware head. If we have only processed a few packets, we just + * leave the hardware head where it is and don't incur the expense of + * moving the hardware head. If we have processed at least our limit + * of packets, then it is good to move the hardware head. + */ + if ( ( num_packets_processed_since_moving_fifo_head > + DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD ) || + ( wrap ) ) { + + DMA_FifoSetHead( fifo_ptr, va_head ); + + num_packets_processed_since_moving_fifo_head = 0; + } + + /* + * If we are anticipating a wrap, go handle the wrap. + */ + if ( ( rc == 0 ) && wrap ) { + /* + * Handle the wrapping of the fifo. This requires extra checking + * and moving of the head, and thus is in its own function. + * It is a generic function, used by other poll functions. Some of + * these other poll functions have the ability to quit processing + * packets when a specified limit is reached overall, or per fifo. + * That is what the last two parameters specify. For this poll + * function, we don't have any limit...we process packets until the + * fifo is empty, so we pass in large unreachable limits. + */ + rc = DMA_RecFifoPollProcessWrap ( + rec_fifo_ptr, + &va_head, + va_tail, + &num_processed, + &num_processed_in_fifo, + 0x7FFFFFFF, /* Infinite packet limit, overall */ + 0x7FFFFFFF);/* Infinite packet limit per fifo */ + + va_logical_tail = va_tail; /* Set to actual tail now. */ + wrap = 0; /* Next time around, don't do wrap processing. */ + } + + } /* End: Process packets until we hit our snapshotted tail */ + +#if defined(BGP_DD1_WORKAROUNDS) + NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */ + /* likely not encountered a fifo full */ + /* condition and stopped. Reset the */ + /* fifo counter so we will start */ + /* tracking empty calls to poll. */ +#endif + + va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */ + /* tail again. */ + + } /* End: Loop while there is something in the fifo */ + + /* + * The fifo is now empty. If we have processed at least one packet, + * return the number, or if the receive function returned an error, + * return that return code. + * Also, clear the reception fifo threshold crossed interrupt condition. + */ + if ( num_processed > 0 ) { + /* Store in the fifo structure the number of packets processed since + * last moving the hardware head, and the current head */ + rec_fifo_ptr->num_packets_processed_since_moving_fifo_head = + num_packets_processed_since_moving_fifo_head; + fifo_ptr->va_head = va_head; + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + _BN(rec_fifo_ptr->global_fifo_id), + 0 ); + + if ( rc == 0 ) return (num_processed); + else return (rc); + } + + /* + * We didn't process any packets. This could be because the rDMA has + * shut-down (a DD1 hardware behavior) because the reception fifo became full. + * We count the number of times we consecutively come up empty, and reactivate + * the rDMA via a system call. + */ + else { + +#if defined(BGP_DD1_WORKAROUNDS) + if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */ + (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) { + /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */ + rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */ + /* reception fifos filled and the */ + /* DMA has stopped. */ + /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */ + NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */ + /* fill-fifo counter. */ + } +#endif + + return (0); /* Return no packets processed */ + } + +} /* End: DMA_RecFifoSimplePollNormalFifoById() */ + + +/*! + * \brief Poll Header Reception Fifo Given a Fifo Group + * + * Poll the "header" reception fifo in the specified fifo group, + * removing one packet after another from the fifo, dispatching the appropriate + * receive function for each packet, until one of the following occurs: + * 1. Total_packets packets are received + * 2. The specified fifo is empty + * 3. A receive function returns a non-zero value + * + * If the header fifo has a packet, the appropriate receive function is + * called. Upon return, the packet is removed from the fifo (the fifo head is + * moved past the packet). + * + * After processing num_packets packets in the fifo (or emptying that fifo), + * the function returns the number of packets processed. + * + * The receive function must be registered through the + * DMA_RecFifoRegisterRecvFunction interface. The receive function is + * called with a pointer to the packet header. The packet header is always + * 16 bytes of contiguous storage, in the fifo. When the + * receive function returns, user code cannot assume that the buffer is + * permanent, i.e., after return, it may be overwritten by either the DMA or + * the poll function. To keep a copy of the packet, the receive function would + * have to copy it to some other location. The packet header is 16-byte aligned + * for optimized copying. + * + * \param[in] num_packets The maximum number of packets that will be + * processed. + * \param[in] num_empty_passes When the not-empty status indicates that all + * fifos in the group are emtpy, this is the + * number of times the not-empty status is + * re-fetched and re-checked before officially + * declaring that they are indeed empty. + * (0 means no extra passes are made). + * \param[in] not_empty_poll_delay The number of pclks to delay between polls + * of the not-empty status when the fifos are + * empty. + * \param[in] fg_ptr Pointer to the fifo group. + * + * \retval num_packets_received The number of packets received and processed. + * \retval negative_value The return code from the receive function that + * caused polling to end. + * + * \pre The caller is responsible for disabling interrupts before invoking this + * function. + * + */ +int DMA_RecFifoPollHeaderFifo( int num_packets, + int num_empty_passes, + int not_empty_poll_delay, + DMA_RecFifoGroup_t *fg_ptr + ) +{ + int fifo_index; /* Index of fifo being processed */ + int num_packets_in_fifo; /* Count of packets processed in a */ + /* fifo. */ + unsigned int status; /* Snapshot of the not empty status*/ + /* for this group. */ + int rc = 0; /* Return code from recv_func. */ + int num_processed = 0; /* Number of packets processed */ + DMA_PacketIovec_t io_vec; /* Payload I/O vector */ + DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function */ + void *recv_func_parm;/* Receive function parameter */ + DMA_RecFifo_t *fifo_ptr; /* Pointer to fifo being processed */ + DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */ + int passes; /* Counter of not-empty passes */ + + SPI_assert( num_packets > 0 ); + SPI_assert( num_empty_passes >= 0 ); + SPI_assert( fg_ptr != NULL ); + + + fifo_index = DMA_HEADER_REC_FIFO_ID; /* We are working with the header */ + /* fifo. */ + fifo_ptr = &(fg_ptr->fifos[fifo_index]); + + /* + * Loop until the header fifo is declared empty, or + * until one of the termination conditions documented in the prolog occurs. + * + */ + for (;;) + { + /* + * If the DMA SRAM not-empty status for this fifo is zero (the fifo is + * empty), the status is checked num_empty_passes times with a slight + * delay in between to give the DMA time to make progress before declaring + * that the fifo is truely empty. + */ + passes = num_empty_passes; + status = DMA_RecFifoGetNotEmptyById( fg_ptr, + fifo_index ); /* Get Header fifo */ + /* not-empty status. */ + while ( ( status == 0 ) && + ( num_empty_passes-- > 0 ) ) + { + /* Delay, allowing the DMA to update its status */ + unsigned int pclks = not_empty_poll_delay; + while( pclks-- ) + { + asm volatile("nop;"); + } + + /* Re-fetch the not-empty status */ + status = DMA_RecFifoGetNotEmptyById( + fg_ptr, + fifo_index ); /* Get Header fifo */ + /* not-empty status.*/ + } + + if ( status == 0 ) { /* Fifo is empty? */ + +#if defined(BGP_DD1_WORKAROUNDS) + if (num_processed > 0) { /* Did we process at least 1 packet? */ + NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */ + /* likely not encountered a fifo full */ + /* condition and stopped. Reset the */ + /* fifo counter so we will start */ + /* tracking empty calls to poll. */ + } + else { + if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */ + (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) { + /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */ + rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */ + /* reception fifos filled and the */ + /* DMA has stopped. */ + /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */ + NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */ + /* fill-fifo counter. */ + } + } +#endif + + return (num_processed); + } + + /* The fifo has something in it. + * Update its shadow va_tail pointer to reflect the amount of packet + * data in the fifo. + */ + DMA_RecFifoGetTailById( fg_ptr, + fifo_index ); + + num_packets_in_fifo = 0; + + /* + * MSYNC before we look at the data in the fifo to ensure that snoops + * issued by the DMA have completed. This ensures the L1 cache + * invalidations have completed so we don't look at stale data. + */ + _bgp_msync(); + + /* + * Within a fifo: The area between the va_head and va_tail shadow pointers + * contains packets to be processed. Loop, processing those packets until + * we have processed packets_per_fifo of them, or all of them, or other + * issues come up. + * + */ + while ( ( num_packets_in_fifo < num_packets ) && + ( fifo_ptr->dma_fifo.va_head != fifo_ptr->dma_fifo.va_tail ) ) + { + DMA_RecFifoGetAddresses( fifo_ptr, + &io_vec ); /* Get the payload pointer(s) */ + /* for the packet at the head */ + /* of the fifo. */ + + packet_ptr = (DMA_PacketHeader_t*) + fifo_ptr->dma_fifo.va_head; /* Point to packet header*/ + + /* Determine the receive function to call */ + recv_func_ptr = DMA_RecFifoInfo.headerRecvFunction; + if ( recv_func_ptr != NULL ) + { + recv_func_parm = DMA_RecFifoInfo.headerRecvFunctionParm; + } + else + { + recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction; + recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm; + } + + /* Call the receive function */ + if( recv_func_ptr) + { + rc = (*recv_func_ptr)(fifo_ptr, + packet_ptr, + recv_func_parm, + NULL, /* No payload */ + 0); /* No payload bytes */ + } + else + { + printk(KERN_ERR "DMA_RecFifoPollHeaderFifo recv_func_ptr was NULL rfifo_ptr=%p packet_ptr=%p recv_func_parm=%p recv_func_payload=%p length=%d\n", + fifo_ptr,packet_ptr,recv_func_parm,NULL,0) ; + + } + + DMA_RecFifoIncrementHead(fifo_ptr, + 1);/* Increment head by 16 bytes */ + + num_processed++; + + if ( rc != 0 ) /* Did receive function fail? */ + { + /* Clear the threshold crossed condition, in case we have gone below + * the threshold. + */ + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + _BN(fifo_ptr->global_fifo_id), + 0 ); + return (rc); /* Yes...return that return code */ + } + + if ( num_processed >= num_packets ) /* Got what they wanted? */ + { + /* Clear the threshold crossed condition, in case we have gone below + * the threshold. + */ + DMA_RecFifoSetClearThresholdCrossed( fg_ptr, + _BN(fifo_ptr->global_fifo_id), + 0 ); + return (num_processed); /* Yes...all done */ + } + + num_packets_in_fifo++; + + } /* End: Process up to packets_per_fifo packets in this fifo */ + + } /* End: Keep looping through the fifo. */ + +} /* End: DMA_RecFifoPollHeaderFifo() */ + +EXPORT_SYMBOL(DMA_RecFifoRegisterRecvFunction) ; +EXPORT_SYMBOL(DMA_RecFifoGetFifoGroup) ; +EXPORT_SYMBOL(DMA_RecFifoPollNormalFifoById) ; +#if defined(CONFIG_BGP_STATISTICS) +EXPORT_SYMBOL(reception_fifo_histogram) ; +EXPORT_SYMBOL(reception_hi_watermark) ; +#endif diff --git a/drivers/char/Makefile b/drivers/char/Makefile index 9caf5b5ad1c05b..6585ac4b75ee3c 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -111,6 +111,8 @@ obj-$(CONFIG_PS3_FLASH) += ps3flash.o obj-$(CONFIG_JS_RTC) += js-rtc.o js-rtc-y = rtc.o +obj-$(CONFIG_BGP) += bluegene_console.o bluegene_networks.o + # Files generated that shall be removed upon make clean clean-files := consolemap_deftbl.c defkeymap.c diff --git a/drivers/char/bluegene_console.c b/drivers/char/bluegene_console.c new file mode 100644 index 00000000000000..8bbfe64bcca0bc --- /dev/null +++ b/drivers/char/bluegene_console.c @@ -0,0 +1,805 @@ +/* + * Blue Gene Console over JTAG. + * + * (C) Copyright IBM Corp. 2003,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Todd Inglett <tinglett@vnet.ibm.com> + * + * + */ + +#include <linux/unistd.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/console.h> +#include <linux/major.h> +#include <linux/kernel.h> +#include <linux/tty.h> +#include <linux/tty_flip.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/kbd_kern.h> +#include <linux/errno.h> +#include <asm/uaccess.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/sysrq.h> +#include <linux/syscalls.h> +#include <linux/proc_fs.h> + +#include <asm/bluegene.h> +#include <asm/bluegene_ras.h> + +static struct proc_dir_entry *proc_ras; /* /proc/ras */ +static struct proc_dir_entry *proc_ras_ascii; /* /proc/ras_ascii */ + + +/* ToDo: figure out what to do with bgprintf... */ +#define bgprintf udbg_printf +#include <asm/udbg.h> + + +#define BLUEGENE_MAJOR 229 +#define BLUEGENE_MINOR 0 + + +typedef struct _BG_CONSOLE { + struct tty_struct* tty; + spinlock_t ttyLock; + struct tty_driver* ttyDriver; +#define BG_OUTBOX_BUFF_SIZE 8192 + unsigned char outboxBuff[BG_OUTBOX_BUFF_SIZE]; + spinlock_t outboxLock; +#define BG_RAS_MAGIC_CHAR ((unsigned char) 0xff) +#define BG_RAS_TYPE_BINARY ((unsigned char) 0x82) +#define BG_RAS_TYPE_ASCII ((unsigned char) 0x88) +#define BG_OUTBOX_MSG_SIZE 248 + unsigned int outboxHead; + unsigned int outboxTail; + unsigned int outboxMsgAge; +#define BG_OUTBOX_MAX_AGE 36 + unsigned int outboxRetry; +#define BG_OUTBOX_MAX_RETRY 2 + int outboxMsgSent; + struct task_struct* kmboxdTask; + /* Wait queue to wakeup kmboxd. For now it runs strictly on timeout (polling), + * but in the future an interrupt or other means could wake it. + */ + wait_queue_head_t wait; +} BG_CONSOLE; + + +static BG_CONSOLE bgc = { + .tty = NULL, + .ttyLock = SPIN_LOCK_UNLOCKED, + .ttyDriver = NULL, + .outboxLock = SPIN_LOCK_UNLOCKED, + .outboxHead = 0, + .outboxTail = 0, + .outboxMsgAge = 0, + .outboxRetry = 0, + .outboxMsgSent = 0, + .kmboxdTask = NULL, +}; + + +#define BG_OUTBOX_HEAD_INCREMENT(i) bgc.outboxHead = (bgc.outboxHead + (i)) % BG_OUTBOX_BUFF_SIZE +#define BG_OUTBOX_TAIL_INCREMENT(i) bgc.outboxTail = (bgc.outboxTail + (i)) % BG_OUTBOX_BUFF_SIZE + + +/* How many bytes of outbox buffer space are in use. The caller must be */ +/* holding the outbox lock. */ +static inline int __bgOutboxBufferUsed(void) +{ + int rc = 0; + + if (bgc.outboxHead <= bgc.outboxTail) + rc = bgc.outboxTail - bgc.outboxHead; + else + rc = BG_OUTBOX_BUFF_SIZE - bgc.outboxHead + bgc.outboxTail; + + return rc; +} + + +/* How many bytes of buffer space are in use. */ +static inline int bgOutboxBufferUsed(struct tty_struct* tty) +{ + unsigned long flags; + int rc; + + spin_lock_irqsave(&bgc.outboxLock, flags); + rc = __bgOutboxBufferUsed(); + spin_unlock_irqrestore(&bgc.outboxLock, flags); + + return rc; +} + + +/* How many bytes of outbox buffer space are unused. The caller must be */ +/* holding the outbox lock. */ +static inline int __bgOutboxBufferFree(void) +{ + int rc; + + if (bgc.outboxHead > bgc.outboxTail) + rc = bgc.outboxHead - bgc.outboxTail; + else + rc = BG_OUTBOX_BUFF_SIZE - bgc.outboxTail + bgc.outboxHead; + + return rc; +} + + +/* How many bytes of buffer space are free. */ +static inline int bgOutboxBufferFree(struct tty_struct* tty) +{ + int rc; + unsigned long flags; + + spin_lock_irqsave(&bgc.outboxLock, flags); + rc = __bgOutboxBufferFree(); + spin_unlock_irqrestore(&bgc.outboxLock, flags); + + return rc; +} + + +/* Append the specified data to the outbox buffer. */ +static inline int __bgOutboxBufferAppend(unsigned char* data, + unsigned int dataLen) +{ + int i = 0; + + while ((!dataLen && data[i]) || i < dataLen) { + bgc.outboxBuff[bgc.outboxTail] = data[i++]; + if ((bgc.outboxTail + 1) % BG_OUTBOX_BUFF_SIZE != bgc.outboxHead) + bgc.outboxTail = (bgc.outboxTail + 1) % BG_OUTBOX_BUFF_SIZE; + else + break; + } + + return i; +} + + +/* Remove the specified number of bytes from the outbox buffer. */ +static inline int __bgOutboxBufferRemove(unsigned char* data, + unsigned int dataLen) +{ + int i = 0; + + while (bgc.outboxHead != bgc.outboxTail && i < dataLen) { + data[i++] = bgc.outboxBuff[bgc.outboxHead]; + bgc.outboxHead = (bgc.outboxHead + 1) % BG_OUTBOX_BUFF_SIZE; + } + + return i; +} + + +/* Search for the end of the line, starting at the specified index for the specified maximum length. */ +/* The end of a line is defined by the presence of a newline character or the RAS magic character or */ +/* the end of the buffer. The number of bytes in the line are returned and 'index' is set to the */ +/* buffer index of the last character in the line. If no line can be found zero is returned and */ +/* 'index' is set to the buffer index of the last character examined. The caller must ensure that */ +/* the outbox is locked. */ +inline static int __bgOutboxBuffFindEOL(unsigned int* index, unsigned int maxLen) { + int rc; + int i = *index; + int limit; + int foundRAS = 0; + + /* Determine the limit of the search. */ + limit = (*index + maxLen - 1 < BG_OUTBOX_BUFF_SIZE - 1 ? *index + maxLen - 1 : BG_OUTBOX_BUFF_SIZE - 1); + if (bgc.outboxTail > *index && limit > bgc.outboxTail -1) + limit = bgc.outboxTail - 1; + + /* Search for a newline. */ + while (i < limit && bgc.outboxBuff[i] != '\n') { + if (bgc.outboxBuff[i] == BG_RAS_MAGIC_CHAR) { + unsigned char nextChar = bgc.outboxBuff[(i+1) % BG_OUTBOX_BUFF_SIZE]; + + if ((nextChar == BG_RAS_TYPE_BINARY || nextChar == BG_RAS_TYPE_ASCII) && + (i+1) % BG_OUTBOX_BUFF_SIZE != bgc.outboxTail) { + foundRAS = 1; + break; + } + } + i++; + } + if (bgc.outboxBuff[i] == '\n') { + /* Found the end of a line. */ + rc = i - *index + 1; + *index = i; + } else if (foundRAS) { + /* Ran into a RAS message so end the line. */ + rc = i - *index;; + *index = i - 1; + } else { + /* Reached the search limit. */ + rc = 0; + *index = i; + } + + return rc; +} + + +/* Send any buffered messages so long as the outbox is ready. This function assumes that the caller is */ +/* holding the outbox buffer lock. */ +int __bgFlushOutboxMsgs(void) +{ + int rc = 0; + + /* Send buffered outbox messages as long as there is something to send and the mailbox is ready. */ + while (bgc.outboxHead != bgc.outboxTail && !bluegene_testForOutboxCompletion()) { + unsigned char nextChar = bgc.outboxBuff[(bgc.outboxHead + 1) % BG_OUTBOX_BUFF_SIZE]; + + /* We have a message to send. Is it RAS or a console message? */ + if (bgc.outboxBuff[bgc.outboxHead] == BG_RAS_MAGIC_CHAR && + (nextChar == BG_RAS_TYPE_BINARY || nextChar == BG_RAS_TYPE_ASCII) && + (__bgOutboxBufferUsed() >= sizeof(bg_ras) + 2)) { + /* Send a RAS message to the outbox. */ + bg_ras ras; + int rc; + + /* Copy the RAS information out of the buffer into a form we can easily deal with. */ + BG_OUTBOX_HEAD_INCREMENT(2); + rc = __bgOutboxBufferRemove((unsigned char*) &ras, sizeof(ras)); + + /* Send the RAS. */ + do { + if (nextChar == BG_RAS_TYPE_BINARY) { + /* Send binary RAS to the outox. */ + bgc.outboxMsgSent = !bluegene_writeRASEvent_nonBlocking(ras.comp, ras.subcomp, ras.code, + ras.length / sizeof(int), (int*) ras.data); + } else if (nextChar == BG_RAS_TYPE_ASCII) { + /* Send ASCII RAS. */ + int sent = bluegene_writeRASString_nonBlocking(ras.comp, ras.subcomp, ras.code, ras.data); + + bgc.outboxMsgSent = (sent == 0 || sent == -2); + } else { + bgprintf("Unknown RAS msg type %d\n", nextChar); + break; + } + } while (!bgc.outboxMsgSent && bgc.outboxRetry++ < BG_OUTBOX_MAX_RETRY); + if (!bgc.outboxMsgSent) { + bgprintf("Unable to send RAS (0x%02x 0x%02x 0x%02x\n", ras.comp, ras.subcomp, ras.code); + rc = -EIO; + } + bgc.outboxRetry = 0; + } else { + /* Send console messages. */ + unsigned int EOL = bgc.outboxHead; + unsigned int msgLen = 0; + unsigned int len; + + /* Group lines into an outbox-sized block of lines. */ + while (EOL != bgc.outboxTail && msgLen < BG_OUTBOX_MSG_SIZE && + (len = __bgOutboxBuffFindEOL(&EOL, BG_OUTBOX_MSG_SIZE - msgLen)) > 0) { + /* Found another line. Append it to the outbox message. */ + EOL = (EOL+1) % BG_OUTBOX_BUFF_SIZE; + msgLen += len; + } + + /* Determine if there are complete lines to print or if we should print a partial line. */ + if (!msgLen) { + unsigned int bytesAvailable = EOL - bgc.outboxHead + 1; + + if (bytesAvailable == BG_OUTBOX_MSG_SIZE || bgc.outboxMsgAge++ >= BG_OUTBOX_MAX_AGE) { + /* Either we have a full outbox message or output is too old. Send it now. */ + msgLen = bytesAvailable; + } else { + rc = -EAGAIN; // wait for more output + break; + } + } + + /* Send any outbox message data. */ + if (msgLen) { + bgc.outboxMsgSent = !bluegene_writeToMailboxConsole_nonBlocking(bgc.outboxBuff+bgc.outboxHead, msgLen); + if (bgc.outboxMsgSent || bgc.outboxRetry++ > BG_OUTBOX_MAX_RETRY) { + BG_OUTBOX_HEAD_INCREMENT(msgLen); + bgc.outboxMsgAge = bgc.outboxRetry = 0; + rc = (bgc.outboxMsgSent ? rc + 1 : -EIO); + } else { + rc = -EAGAIN; + } + } + } + } + + /* If a message was sent (now or during a past call) then check to see if the message has been */ + /* taken so that we lower outbox attention ASAP. */ + if (bgc.outboxMsgSent && !bluegene_testForOutboxCompletion()) + bgc.outboxMsgSent = 0; + + /* If there is something to send but the outbox wasn't ready then return -EWOULDBLOCK. */ + if (!rc && bgc.outboxHead != bgc.outboxTail) + rc = -EWOULDBLOCK; + + return rc; +} + + +/* Send any buffered messages so long as the outbox is ready. This function locks the outbox before accessing it. */ +inline int bgFlushOutboxMsgs(void) +{ + int rc; + unsigned long flags; + + spin_lock_irqsave(&bgc.outboxLock, flags); + rc = __bgFlushOutboxMsgs(); + spin_unlock_irqrestore(&bgc.outboxLock, flags); + + return rc; +} + + +/* Add a console message to the outbox buffer. */ +int bgWriteConsoleMsg(struct tty_struct* tty, + const unsigned char* msg, + int msgLen) +{ + int rc = 0; + + if (msgLen > 0) { + unsigned long flags; + + /* Lock the outbox. */ + spin_lock_irqsave(&bgc.outboxLock, flags); + + /* Copy the message to the buffer, wrapping around if necessary. */ + rc = __bgOutboxBufferAppend((char*) msg, (unsigned int) msgLen); + + /* Unlock outbox. */ + spin_unlock_irqrestore(&bgc.outboxLock, flags); + } + + return rc; +} + + +/* Add a binary RAS event to the outbox buffer. If the buffer is full this function flushes */ +/* outbox messages to free buffer space. */ +int bgWriteRasEvent(unsigned int component, + unsigned int subcomponent, + unsigned int errCode, + unsigned int data[], + unsigned int dataLen) +{ + int rc = 1; + unsigned long flags; + bg_ras ras; + + /* Lock the outbox buffer. */ + spin_lock_irqsave(&bgc.outboxLock, flags); + + /* If insufficient buffer space exists then flush outbox messages until we free enough space. */ + while (__bgOutboxBufferFree() < sizeof(ras) + 2) + __bgFlushOutboxMsgs(); + + /* Initialize the RAS structure. */ + ras.comp = component; + ras.subcomp = subcomponent; + ras.code = errCode; + ras.length = (dataLen <= sizeof(ras.data) ? dataLen : sizeof(ras.data)); + memcpy(ras.data, (char*) data, ras.length); + + /* Copy the RAS information to the outbox buffer. */ + bgc.outboxBuff[bgc.outboxTail] = BG_RAS_MAGIC_CHAR; + BG_OUTBOX_TAIL_INCREMENT(1); + bgc.outboxBuff[bgc.outboxTail] = BG_RAS_TYPE_BINARY; + BG_OUTBOX_TAIL_INCREMENT(1); + rc = __bgOutboxBufferAppend((unsigned char*) &ras, sizeof(ras)); + + /* Unlock the outbox buffer. */ + spin_unlock_irqrestore(&bgc.outboxLock, flags); + + return rc; +} + + +/* Add an ASCII RAS event to the outbox buffer. If the buffer is full this function flushes */ +/* outbox messages to free buffer space. */ +int bgWriteRasStr(unsigned int component, + unsigned int subcomponent, + unsigned int errCode, + char* str, + unsigned int strLen) +{ + int rc = 1; + unsigned long flags; + bg_ras ras; + + /* Lock the outbox buffer. */ + spin_lock_irqsave(&bgc.outboxLock, flags); + + /* If insufficient buffer space exists then flush outbox messages until we free enough space. */ + while (__bgOutboxBufferFree() < sizeof(ras) + 2) + __bgFlushOutboxMsgs(); + + /* Initialize the RAS structure. */ + ras.comp = component; + ras.subcomp = subcomponent; + ras.code = errCode; + if (!strLen || strLen > sizeof(ras.data)) + strLen = sizeof(ras.data)-1; + for (ras.length = 0; *str && ras.length < strLen; str++, ras.length++) + ras.data[ras.length] = *str; + ras.data[ras.length] = '\0'; + + /* Copy the RAS information to the outbox buffer. */ + bgc.outboxBuff[bgc.outboxTail] = BG_RAS_MAGIC_CHAR; + BG_OUTBOX_TAIL_INCREMENT(1); + bgc.outboxBuff[bgc.outboxTail] = BG_RAS_TYPE_ASCII; + BG_OUTBOX_TAIL_INCREMENT(1); + rc = __bgOutboxBufferAppend((unsigned char*) &ras, sizeof(ras)); + + /* Unlock the outbox buffer. */ + spin_unlock_irqrestore(&bgc.outboxLock, flags); + + return rc; +} + + +static int bluegenecons_open(struct tty_struct *tty, struct file * filp) +{ + if (tty->count == 1) { + bgc.tty = tty; + tty->driver_data = &bgc; + } + + return 0; +} + +static void bluegenecons_close(struct tty_struct *tty, struct file * filp) +{ + if (tty && tty->count == 1) { + bgc.tty = NULL; + } + + return; +} + + +#define BLUEGENECONS_MAGIC_SYSRQ_KEY (15) /* ^O */ + +static void bluegenecons_rcv(char *msg, int msglen) +{ + struct tty_struct *tty; + unsigned long flags; + static int sysrq_mode; + + spin_lock_irqsave(&bgc.ttyLock, flags); + tty = bgc.tty; + if (tty) { + while (msglen) { + int i; + int count = tty_buffer_request_room(tty, msglen); + + for (i = 0; i < count; i++) { + if (sysrq_mode) { + handle_sysrq(msg[i], tty); + sysrq_mode = 0; + } else if (msg[i] == BLUEGENECONS_MAGIC_SYSRQ_KEY) + sysrq_mode = 1; + else + tty_insert_flip_char(tty, msg[i], 0); + } + msglen -= count; + msg += count; + tty_flip_buffer_push(tty); + } + } + spin_unlock_irqrestore(&bgc.ttyLock, flags); + + return; +} + + +/* + * Mailbox polling kernel thread. + * + * This thread wakes up at intervals to check for inbound mailbox messages + * and it will send waiting outbound messages if the outbound box is free. + */ +int kmboxd(void *arg) +{ + __set_current_state(TASK_RUNNING); + do { + int rc; + + /* If there is anything in the inbox read it now. */ + if (bluegene_testInboxAttention()) { + static char buffer[512]; + int len; + + /* Fetch any input */ + len = bluegene_readFromMailboxConsole(buffer, sizeof(buffer)); + if (len > 0) + bluegenecons_rcv(buffer, len); + } + + /* Flush any console output that is buffered. */ + rc = bgFlushOutboxMsgs(); + + /* If outbox buffer data was written then wake any TTY writer */ + /* that is waiting. */ + if (rc > 0 && bgc.tty) { + if ((bgc.tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) + && bgc.tty->ldisc.ops->write_wakeup) + (bgc.tty->ldisc.ops->write_wakeup)(bgc.tty); + wake_up_interruptible(&bgc.tty->write_wait); + } + + wait_event_timeout(bgc.wait, 0, msecs_to_jiffies(10)); + } while (!kthread_should_stop()); + + return 0; +} + + +#ifdef CONFIG_MAGIC_SYSRQ + +extern void ctrl_alt_del(void); + +static int bluegene_do_sysrq(void* data) +{ + int key = (int) data; + static char* env[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", + "LD_LIBRARY_PATH=/lib:/usr/lib", NULL }; + + switch(key) { + case 'h' : + { + static char* argv[] = { "/etc/rc.shutdown", NULL }; + + kernel_execve(argv[0], argv, env); + printk(KERN_EMERG "Failure halting I/O node. Attempting secondary method.\n"); + ctrl_alt_del(); + break; + } + + case 'x' : + { + static char* argv[] = { "/etc/rc.reboot", NULL }; + + kernel_execve(argv[0], argv, env); + printk(KERN_EMERG "Failure rebooting I/O node.\n"); + break; + } + + default : + printk(KERN_EMERG "Unknown sysrq '%c'\n", key); + } + + return 0; +} + + +static void bluegene_handle_sysrq(int key, struct tty_struct *tty) +{ + struct task_struct* t = kthread_run(bluegene_do_sysrq, (void*) key, "Process System Request"); + + if (IS_ERR(t)) { + printk(KERN_EMERG "Failure creating sysrq '%c' thread.\n", (char) key); + bgWriteRasStr(bg_comp_kernel, bg_subcomp_linux, bg_code_sysrq_thread_create_failure, + "Failure creating sysrq thread.", 0); + if (key == 'h') + ctrl_alt_del(); + } + + return; +} + +static struct sysrq_key_op bg_sysrq_halt_op = { + .handler = bluegene_handle_sysrq, + .help_msg = "Halt", + .action_msg = "Halt node" +}; + +static struct sysrq_key_op bg_sysrq_reboot_op = { + .handler = bluegene_handle_sysrq, + .help_msg = "Reboot", + .action_msg = "Reboot node" +}; +#endif + + +static struct tty_operations bgcons_ops = { + .open = bluegenecons_open, + .close = bluegenecons_close, + .write = bgWriteConsoleMsg, + .write_room = bgOutboxBufferFree, + .chars_in_buffer = bgOutboxBufferUsed, +}; + + +/* Read interface not defined so we just return EOF */ +static int bluegene_rasevent_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + return 0; +} + + +/* Write the event. The user provides the payload...we provide the rest. + */ +static int bluegene_rasevent_write(struct file *file, const char *buffer, + unsigned long len, void *data) +{ + bg_ras ras; + + /* Truncate the message if it is too large. */ + if (len > sizeof(ras)) + len = sizeof(ras); + else if (len < ((unsigned long) &ras.data - (unsigned long) &ras)) + return -EIO; + + if (copy_from_user(&ras, buffer, len)) + return -EFAULT; + else { + if (!data) + bgWriteRasEvent(ras.comp, ras.subcomp, ras.code, + (unsigned int*) ras.data, ras.length); + else { + /* ASCII detail data was written. */ + if (!ras.length) + ras.data[0] = '\0'; + bgWriteRasStr(ras.comp, ras.subcomp, ras.code, + ras.data, ras.length); + } + } + + return len; +} + + +static inline char* entryName(char* path) +{ + char* lastSlash = NULL; + + while (*path) { + if (*path == '/') + lastSlash = path + 1; + path++; + } + + return lastSlash; +} + + +static int __init bluegenecons_init(void) +{ + + bgc.ttyDriver = alloc_tty_driver(1); + if (!bgc.ttyDriver) { + char* msg = "Failure allocating BlueGene console driver."; + + bgprintf(msg); + bluegene_writeRASString(bg_comp_kernel, bg_subcomp_linux, bg_code_tty_alloc_failure, msg); + return -EIO; + } + + bgc.ttyDriver->owner = THIS_MODULE; + bgc.ttyDriver->name = "bgcons"; + bgc.ttyDriver->name_base = 1; + bgc.ttyDriver->major = BLUEGENE_MAJOR; + bgc.ttyDriver->minor_start = BLUEGENE_MINOR; + bgc.ttyDriver->type = TTY_DRIVER_TYPE_SYSTEM; + bgc.ttyDriver->init_termios = tty_std_termios; + bgc.ttyDriver->flags = TTY_DRIVER_REAL_RAW; + tty_set_operations(bgc.ttyDriver, &bgcons_ops); + + if (tty_register_driver(bgc.ttyDriver)) { + char* msg = "Failure registering BlueGene console driver"; + + bgprintf(msg); + bluegene_writeRASString(bg_comp_kernel, bg_subcomp_linux, bg_code_tty_reg_failure, msg); + return -EIO; + } + +#ifdef CONFIG_MAGIC_SYSRQ + /* Sysrq h is sent by the control system to halt an ION during free_block */ + register_sysrq_key('h', &bg_sysrq_halt_op); + + /* Sysrq x is sent by the control system when ION reboot is requested. */ + register_sysrq_key('x', &bg_sysrq_reboot_op); +#endif + + /* Kick off the kernel mailbox poll thread. */ + init_waitqueue_head(&bgc.wait); + bgc.kmboxdTask = kthread_run(kmboxd, NULL, "kmboxd"); + if (IS_ERR(bgc.kmboxdTask)) { + char* msg = "Failure creating mailbox processing thread."; + + bgprintf(msg); + bluegene_writeRASString(bg_comp_kernel, bg_subcomp_linux, bg_code_mbox_thread_create_failure, msg); + put_tty_driver(bgc.ttyDriver); + return -EIO; + } + + /* Create /proc RAS interfaces. */ + proc_ras = create_proc_entry(entryName(BG_RAS_FILE), S_IFREG | S_IRWXUGO, NULL); + if (proc_ras) { + proc_ras->nlink = 1; + proc_ras->read_proc = (void*) bluegene_rasevent_read; + proc_ras->write_proc = (void*) bluegene_rasevent_write; + proc_ras->data = (void*) 0; // not ASCII message + } + proc_ras_ascii = create_proc_entry(entryName(BG_RAS_ASCII_FILE), S_IFREG | S_IRWXUGO, NULL); + if (proc_ras_ascii) { + proc_ras_ascii->nlink = 1; + proc_ras_ascii->read_proc = (void*) bluegene_rasevent_read; + proc_ras_ascii->write_proc = (void*) bluegene_rasevent_write; + proc_ras_ascii->data = (void*) 1; // is ASCII message + } + + return 0; +} + +static void __exit bluegenecons_exit(void) +{ + if (proc_ras) { + remove_proc_entry(proc_ras->name, NULL); + proc_ras = NULL; + } + if (proc_ras_ascii) { + remove_proc_entry(proc_ras_ascii->name, NULL); + proc_ras_ascii = NULL; + } + + return; +} + +/* + * Console write. + */ +static void bluegene_console_write(struct console *co, const char *b, unsigned count) +{ + if (count > 0) + bgWriteConsoleMsg(bgc.tty, b, count); +} + +static struct tty_driver *bluegene_console_device(struct console *c, int *ip) +{ + *ip = 0; + return bgc.ttyDriver; +} + + +static struct console bgcons = { + .name = "bgcons", + .write = bluegene_console_write, + .device = bluegene_console_device, + .flags = CON_PRINTBUFFER, + .index = 0, +}; + +int __init bluegene_console_init(void) +{ + register_console(&bgcons); + + return 0; +} + + +module_init(bluegenecons_init); +module_exit(bluegenecons_exit); +console_initcall(bluegene_console_init); diff --git a/drivers/char/bluegene_networks.c b/drivers/char/bluegene_networks.c new file mode 100644 index 00000000000000..5e06e0c4609ce3 --- /dev/null +++ b/drivers/char/bluegene_networks.c @@ -0,0 +1,202 @@ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/ctype.h> +#include <linux/types.h> +#include <linux/cdev.h> +#include <linux/semaphore.h> +#include <linux/fs.h> +#include <linux/mm.h> + +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/pgtable.h> + + +static int bgpnet_add_device(int major, int minor, const char* name, unsigned long long base); +static int bgpnet_device_open(struct inode *inode, struct file *filp); +static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *); +static int bgpnet_device_release(struct inode *inode, struct file * filp); +static int bgpnet_device_ioctl(struct inode *inode, struct file * filp, + unsigned int cmd, unsigned long arg); + + +#define BGP_COL_MAJOR_NUM 120 +#define BGP_TORUS_MAJOR_NUM 121 +#define BGP_GI_MAJOR_NUM 122 +#define BGP_COL_MINOR_NUMS 2 +#define BGP_TORUS_MINOR_NUMS 2 +#define BGP_GI_MINOR_NUMS 4 +#define _BGP_UA_COL0 (0x6) +#define _BGP_PA_COL0 (0x10000000) +#define _BGP_UA_COL1 (0x6) +#define _BGP_PA_COL1 (0x11000000) +#define _BGP_UA_TORUS0 (0x6) +#define _BGP_PA_TORUS0 (0x01140000) +#define _BGP_UA_TORUS1 (0x6) +#define _BGP_PA_TORUS1 (0x01150000) + +struct bgpnet_dev +{ + int major,minor; /* device major, minor */ + unsigned long long physaddr; /* physical address */ + struct task_struct* current; /* process holding device */ + int signum; /* signal to send holding process */ + wait_queue_head_t read_wq; + int read_complete; + void *regs; /* mapped regs (only used with col) */ + struct semaphore sem; /* interruptible semaphore */ + struct cdev cdev; /* container device? */ +}; + + +#define BGP_MAX_DEVICES 8 +static struct bgpnet_dev bgpnet_devices[BGP_MAX_DEVICES]; +static unsigned int bgpnet_num_devices = 0; + + +static struct file_operations bgpnet_device_fops = +{ + .owner= THIS_MODULE, + .open= bgpnet_device_open, + .read = NULL, + .write= NULL, + .poll= NULL, + .ioctl= bgpnet_device_ioctl, + .release= bgpnet_device_release, + .mmap= bgpnet_device_mmap, +}; + + +static int bgpnet_add_device(int major, + int minor, + const char* devname, + unsigned long long physaddr) +{ + int ret; + dev_t devno; + struct bgpnet_dev* dev = &bgpnet_devices[bgpnet_num_devices]; + + /* initilize struct */ + init_MUTEX (&dev->sem); + dev->major = major; + dev->minor = minor; + dev->physaddr = physaddr; + init_waitqueue_head(&dev->read_wq); + dev->read_complete = 0; + if (physaddr) { + dev->regs = ioremap(physaddr, 4096); + } + devno=MKDEV(major,minor); + + /* register i.e., /proc/devices */ + ret=register_chrdev_region(devno,1,(char *)devname); + + if (ret) { + printk (KERN_WARNING "bgpnet: couldn't register device (%d,%d) err=%d\n", + major,minor,ret); + return ret; + } + + /* add cdev */ + cdev_init(&dev->cdev,&bgpnet_device_fops); + dev->cdev.owner=THIS_MODULE; + dev->cdev.ops=&bgpnet_device_fops; + ret=cdev_add(&dev->cdev,devno,1); + if (ret) { + printk(KERN_WARNING "bgpnet: couldn't register device (%d,%d), err=%d\n", + major,minor,ret); + return ret; + } + + /* signul to pass to owning process, should be altered using ioctl */ + dev->signum=-1; + + bgpnet_num_devices++; + + return 0; +} + + +static int bgpnet_device_open (struct inode *inode, struct file *filp) +{ + struct bgpnet_dev *dev=container_of(inode->i_cdev,struct bgpnet_dev,cdev); + + if(down_interruptible(&dev->sem)) return -ERESTARTSYS; + up(&dev->sem); + + dev->current=current; + filp->private_data = (void*) dev; + + return 0; +} + + + +static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *vma) +{ + unsigned long vsize = vma->vm_end - vma->vm_start; + struct bgpnet_dev * device = (struct bgpnet_dev *)filp->private_data; + int ret = -1; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO; + vma->vm_flags |= VM_RESERVED; + + if (device->physaddr != 0) + ret = remap_pfn_range(vma, + vma->vm_start, + device->physaddr >> PAGE_SHIFT, + vsize, + vma->vm_page_prot); + + if (ret) + printk (KERN_WARNING "bgpnet: mapping of device (%d,%d) failed\n", + device->major, device->minor); + + return ret? -EAGAIN :0; +} + + +static int bgpnet_device_release (struct inode *inode, struct file * filp) +{ + struct bgpnet_dev *dev=(struct bgpnet_dev *)filp->private_data; + + /*Ensure exclusive access*/ + if (down_interruptible(&dev->sem)) return -ERESTARTSYS; + + dev->current = NULL; + up(&dev->sem); + + return 0; +} + + +static int bgpnet_device_ioctl (struct inode *inode, + struct file * filp, + unsigned int cmd, + unsigned long arg) +{ + return 0; +} + + +static int __init bgpnet_module_init(void) +{ + int rc = 0; + unsigned long long tr0, tr1, ts0, ts1; + + tr0=((unsigned long long) _BGP_UA_COL0 << 32) + _BGP_PA_COL0; + tr1=((unsigned long long) _BGP_UA_COL1 << 32) + _BGP_PA_COL1; + ts0=((unsigned long long) _BGP_UA_TORUS0 << 32) + _BGP_PA_TORUS0; + ts1=((unsigned long long) _BGP_UA_TORUS1 << 32) + _BGP_PA_TORUS1; + + bgpnet_add_device(BGP_COL_MAJOR_NUM, 0,"bgptree_vc0", tr0); + bgpnet_add_device(BGP_COL_MAJOR_NUM, 1, "bgptree_vc1", tr1); + bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 0, "bgptorus_g0", ts0); + bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 1, "bgptorus_g1", ts1); + + return rc; +} + + +module_init(bgpnet_module_init); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 62d732a886f199..411fd60041755a 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2628,6 +2628,166 @@ config QLGE To compile this driver as a module, choose M here: the module will be called qlge. +config BGP_COLLECTIVE + tristate "BlueGene Ethernet-on-Collective support" + default y if BGP=y + depends on BGP + help + This driver supports the BlueGene Ethernet-over-collective + controller, for running IP between IO and Compute nodes. + +config BGP_COLLECTIVE_IP_CHECKSUM + bool "Request IP-layer software checksum on the BGP collective" + default y + depends on BGP_COLLECTIVE + help + The BlueGene collective network has hardware CRC-and-retry, which is stronger than IP checksum + But you can select IP checksumming as well. + +config BGP_COLLECTIVE_NAPI + tristate "BlueGene Ethernet-on-Collective NAPI support" + default n + depends on BGP + help + This configures the BGP collective driver to use NAPI interrupt mitigation + +config BGP_DMA + tristate "BlueGene Torus DMA support" + default y if BGP=y + depends on BGP + help + This driver supports the BlueGene torus DMA unit. + You will need it if you want to use BGP_TORUS + +config BGP_TORUS + tristate "BlueGene Ethernet-on-Torus support" + default y if BGP=y + depends on BGP + help + This driver supports the BlueGene Ethernet-over-torus + controller, for running IP amongst Compute nodes + +config BGP_TORUS_DIAGNOSTICS + bool "Diagnostics for BlueGene Ethernet-on-Torus" + default y if BGP=y + depends on BGP + help + This inserts diagnostics into the TCP layers, to support + optimisation of the IP-on-BlueGene-Torus code + +config BGP_FRANKENTORUS + tristate "BlueGene Ethernet-on-Torus support, vrnic-style" + default n + depends on BGP + help + This driver supports the BlueGene Ethernet-over-torus vrnic + controller, for running IP amongst Compute nodes + +config BGP_TORUS_IP_CHECKSUM + bool "Request IP-layer software checksum on the BGP torus" + default y + depends on BGP_TORUS + help + The BlueGene torus network has hardware CRC-and-retry, which is stronger than IP checksum + But you can select IP checksumming as well. + +config BGP_RECEPTION_MEMORY_FIFO_SHIFT + int "log2(BlueGene torus software reception FIFO size)" + depends on BGP + default "24" + help + FIFO should be somewhere between 64kB and 32MB + +config BGP_TORUS_ADAPTIVE_ROUTING + tristate "BlueGene Ethernet-on-Torus with adaptive routing" + default n + depends on BGP + help + Support for IP with adaptive packet routing on the torus (experimental) + +config BGP_VRNIC + tristate "BlueGene virtual RNIC support" + default m if BGP=y + depends on BGP + help + This driver supports the BlueGene virtual RNIC + controller, for running test cases against the vRNIC + +config BGP_VRNIC_START + hex "Real address start of BGP VRNIC. Linux not to inadvertently use real store in this region" + depends on BGP_VRNIC + default "0xe0000000" + +config BGP_VRNIC_SIZE + hex "Number of bytes of memory put over to BGP VRNIC. Linux not to inadvertently use real store in this region" + depends on BGP_VRNIC + default "0x10000000" + +config BGP_STATISTICS + tristate "BlueGene Statistics support" + default y if BGP=y + depends on BGP + help + This driver supports gathering of statistics related to + BlueGene/P hardware + + +config BGP_E10000 + tristate "BlueGene on-chip Ethernet support" + default Y if BGP=y + depends on BGP + help + This driver supports the BlueGene 10Gb on-chip Ethernet + controller. + +config BGP_E10000_RXB + int "Total size in bytes of receive buffers (1MB maximum)" + depends on BGP_E10000 + default "1048576" + +config BGP_E10000_TXB + int "Number of transmit buffers" + depends on BGP_E10000 + default "4096" + +config BGP_E10000_IP_CHECKSUM + bool "Enable HW checksum for TCP/UDP IPv4 traffic" + depends on BGP_E10000 + default y + +config BGP_E10000_NAPI + bool "Enable 'new API' network interface" + depends on BGP_E10000 + default n + +config BGP_E10000_EMAC_LOOPBACK + bool "Enable MAC loopback mode" + depends on BGP_E10000 + help + This ties the output path directly to the input path at the MAC level. + default n + +config BGP_E10000_PHY_LOOPBACK + bool "Enable PHY loopback mode" + depends on BGP_E10000 + help + This ties the output path directly to the input path in the PHY. + default n + +config BGP_E10000_DBG + bool "Debug enablement" + depends on BGP_E10000 + help + This enables debug output. + default n + +config BGP_E10000_DBG_LEVEL + int "Debug level" + depends on BGP_E10000_DBG + help + This sets the amount of debug output. + default 57 + source "drivers/net/sfc/Kconfig" source "drivers/net/benet/Kconfig" @@ -3092,6 +3252,23 @@ config NETPOLL_TRAP config NET_POLL_CONTROLLER def_bool NETPOLL +config TCP_HIATUS_COUNTS + bool "TCP output hiatus counts" + default n + help + This option counts the number of times that TCP output is held back + by reason (e.g. 'congestion window filled'). It is useful if you are + trying to exploit fast networks, to help pin down what is limiting + the transfer rate. + +config TCP_CONGESTION_OVERRIDES + bool "TCP output congestion overrides" + default n + help + This option places controls in sysfs so that TCP congestion parameters + can be overridden system-wide; e.g. turning Nagle off + + config VIRTIO_NET tristate "Virtio network driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 471baaff229ff4..31b9f4376d18c8 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -5,6 +5,13 @@ obj-$(CONFIG_E1000) += e1000/ obj-$(CONFIG_E1000E) += e1000e/ obj-$(CONFIG_IBM_NEW_EMAC) += ibm_newemac/ +obj-$(CONFIG_BGP_E10000) += bgp_e10000/ +obj-$(CONFIG_BGP_COLLECTIVE) += bgp_collective/ +obj-$(CONFIG_BGP_TORUS) += bgp_torus/ +obj-$(CONFIG_BGP_FRANKENTORUS) += bgp_frankentorus/ +obj-$(CONFIG_BGP_VRNIC) += bgp_vrnic/ +obj-$(CONFIG_BGP_STATISTICS) += bgp_statistics/ +# obj-$(CONFIG_BLUEGENE_SOCKETS) += bgp_sockets/ obj-$(CONFIG_IGB) += igb/ obj-$(CONFIG_IXGBE) += ixgbe/ obj-$(CONFIG_IXGB) += ixgb/ diff --git a/drivers/net/bgp_collective/Makefile b/drivers/net/bgp_collective/Makefile new file mode 100644 index 00000000000000..29fbe0adf6b54d --- /dev/null +++ b/drivers/net/bgp_collective/Makefile @@ -0,0 +1,7 @@ +# Makefile for BlueGene collective and torus driver + +EXTRA_CFLAGS += -I$(BGPHOME)/bgp/arch/include -Iarch/powerpc/syslib/bgdd/ -Iarch/ppc/syslib/bgdd/ -g -dA -D__LINUX_KERNEL__ + +bgp_collective-y := bgcol.o bgnet.o + +obj-$(CONFIG_BGP_COLLECTIVE) += bgp_collective.o diff --git a/drivers/net/bgp_collective/bgcol.c b/drivers/net/bgp_collective/bgcol.c new file mode 100644 index 00000000000000..7e10a137839ed3 --- /dev/null +++ b/drivers/net/bgp_collective/bgcol.c @@ -0,0 +1,3330 @@ +/********************************************************************* + * + * Description: Blue Gene low-level driver for collective network + * + * Copyright (c) 2007, 2010 International Business Machines + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Authors: + * Chris Ward <tjcw@uk.ibm.com> + * Volkmar Uhlig <vuhlig@us.ibm.com> + * Andrew Tauferner <ataufer@us.ibm.com> + * + * The protocol implemented here will send a 'jumbo' (9000 byte) frame + * in 38 packets, i.e. 240 bytes payload + 16 bytes link header per packet. + * The measured throughput was 4325 Mbit/sec on one IO link + * + * It is logically possible to send a 'jumbo' frame in 36 packets; to + * do this you need to pack 255 bytes of payload + 1 byte of link + * header per packet (you need to at least indicate which node has sent + * the packet); you probably want to do this by 'trampling' the first + * byte of each packet, sending a 'correction' byte sequence at the + * end of the frame, and having the receiver demultiplex and correct + * the frames. + * This should achieve 4565 Mbit/sec + * + * If you were to drive the link with an MTU of close to 65535, you + * could send a 65270-byte frame in 256 packets, which should achieve + * 4655 Mbit/sec. + * + ********************************************************************/ + +#include <linux/kernel.h> +#include <linux/ctype.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/if_arp.h> +#include <net/arp.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/prom.h> + + +#include "bglink.h" +#include "bgcol.h" +#include "bgnet.h" +#include "bgp_dcr.h" +#include "ppc450.h" + +#include <asm/bluegene.h> + +#define DRV_NAME "bgcol" +#define DRV_VERSION "1.0" +#define DRV_DESC "IBM Blue Gene Collective Driver" + +MODULE_DESCRIPTION(DRV_DESC); +MODULE_AUTHOR("IBM"); +MODULE_LICENSE("GPL"); + +/* SA_ONSTACK is deprecated, but its replacement has not made it into MCP yet. Compatibility ... */ +#if !defined(IRQF_DISABLED) +#define IRQF_DISABLED SA_ONSTACK +#endif + +/* configuration selector macros */ +#define COLLECTIVE_RECEIVE_WITH_SLIH +/* #define COLLECTIVE_DELIVER_VIA_TASKLET */ +/* #define COLLECTIVE_BREAK_ON_FRAME */ +#define COLLECTIVE_TRANSMIT_WITH_SLIH +#define COLLECTIVE_TRANSMIT_WITH_FLIH +#define COLLECTIVE_XMITTER_FREES +#define COLLECTIVE_DUPLEX_SLIH +#define COLLECTIVE_ONEPASS_TXRX +#define BGP_COL_STATUS_VISIBILITY + + +extern void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ; + +/* For diagnosis of certain link sequencing problems, it can be useful to keep a trail of */ +/* recently-arrived link headers. Set this macro if you want a trail kept */ +/* #define KEEP_LNKHDR_TRAIL */ +enum { + k_lnkhdr_trail_display_length = 50, /* Link header amount of trail to display */ + k_lnkhdr_trail_length = 64, /* Link header ring buffer length, next power of 2 above k_lnkhdr_trail_display_length. */ + k_lnhhdr_ffdc_limit = 20 /* First-failure-data-capture limit, we want to catch first failures and not saturate the logging system */ +}; + +/* For diagnostics, track the last thing that we knew happened to the bgcol in interrupt mode */ +enum { + k_bgcolaction_none , + k_bgcolaction_xmit , + k_bgcolaction_xmit_enable , + k_bgcolaction_xmit_irq , + k_bgcolaction_xmit_irq_disable + +}; + +struct bglink_proto * proto_array[k_link_protocol_limit] ; + +/* static int bgcolaction ; */ + +extern int e10000_diag_count ; + + +/* #define CONFIG_BLUEGENE_COLLECTIVE_TRACE */ + +/* #define REQUIRE_TRACE */ + +#include <linux/KernelFxLog.h> + +#include "../bgp_network/bgp_net_traceflags.h" + +/* #if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) */ +/* static int bgcol_debug_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */ +int bgcol_debug_tracemask = k_t_init | k_t_request | k_t_protocol ; +/* int bgcol_debug_tracemask = 0xffffffff ; */ +/* #endif */ + +/* Can drop bits out of COMPILED_TRACEMASK if we want to selectively compile out trace */ +#define COMPILED_TRACEMASK (0xffffffff-k_t_detail-k_t_fifocontents) +/* #define COMPILED_TRACEMASK (k_t_error) */ + +#define XTRACEN(i,x...) +#if defined(REQUIRE_TRACE) +#define TRACE(x...) KernelFxLog(1,x) +#define TRACE1(x...) KernelFxLog(1,x) +#define TRACE2(x...) KernelFxLog(1,x) +#define TRACEN(i,x...) KernelFxLog(1,x) +#define TRACED(x...) KernelFxLog(1,x) +#define TRACES(x...) KernelFxLog(1,x) +#elif defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) +#define TRACE(x...) KernelFxLog(bgcol_debug_tracemask & k_t_general,x) +#define TRACE1(x...) KernelFxLog(bgcol_debug_tracemask & k_t_lowvol,x) +#define TRACE2(x...) KernelFxLog(bgcol_debug_tracemask & k_t_detail,x) +#define TRACEN(i,x...) KernelFxLog(bgcol_debug_tracemask & (COMPILED_TRACEMASK & (i)),x) +#define TRACED(x...) KernelFxLog(1,x) +#define TRACES(x...) KernelFxLog(1,x) +#else +#define TRACE(x...) +#define TRACE1(x...) +#define TRACE2(x...) +#define TRACEN(i,x...) +#define TRACED(x...) +#define TRACES(x...) +#endif + +#define _BGP_DCR_COL 0 + +#define FRAGMENT_TIMEOUT (HZ/10) + +#define COL_LNKHDRLEN (sizeof(struct bglink_hdr_col)) +#define COL_FRAGPAYLOAD (COL_PAYLOAD - COL_LNKHDRLEN) +#define COL_SKB_ALIGN 16 + + +#define BGP_COL_MAJOR_NUM 120 +#define BGP_TORUS_MAJOR_NUM 121 +#define BGP_GI_MAJOR_NUM 122 +#define BGP_COL_MINOR_NUMS 2 +#define BGP_TORUS_MINOR_NUMS 2 +#define BGP_GI_MINOR_NUMS 4 +#define _BGP_UA_COL0 (0x6) +#define _BGP_PA_COL0 (0x10000000) +#define _BGP_UA_COL1 (0x6) +#define _BGP_PA_COL1 (0x11000000) +#define _BGP_UA_TORUS0 (0x6) +#define _BGP_PA_TORUS0 (0x01140000) +#define _BGP_UA_TORUS1 (0x6) +#define _BGP_PA_TORUS1 (0x01150000) + +/* + * 'Oversized' skbuffs are an attempt to increase throughput on the collective interface by arranging for + * 2 cores to work together on pulling data and distributing it. See commentary in bgnet.c as to what needs + * to be done to get it to work. + * Having an skbuff at 64K rather than 9K (to match etherhet 'jumbo' frames) doesn't really cost much memory; + * we are only likely to have a few MB of skbuffs in each IO node, and less in each compute node. + */ +enum { + k_use_plentiful_skb = 1 , /* Whether to use an oversized sk_buff to receive in to */ + k_plentiful_skb_size = 256*COL_FRAGPAYLOAD +}; + +static void bgcol_prefill(struct sk_buff_head * skb_list, unsigned int count) +{ + unsigned int x ; + for(x=0;x<count;x+=1) + { + struct sk_buff *skb=alloc_skb(k_plentiful_skb_size,GFP_KERNEL) ; + if(skb) + { + skb_queue_tail(skb_list,skb) ; + } + + } +} + +static struct sk_buff * take_skb_from_list_for_filling(struct bg_col *col) +{ + return skb_dequeue (&col->skb_list_for_filling) ; +} + +static void replenish_list_for_filling(struct bg_col *col) +{ + struct sk_buff *skb=alloc_skb(k_plentiful_skb_size,GFP_KERNEL) ; + if(skb) + { + skb_queue_tail(&col->skb_list_for_filling,skb) ; + } + +} +/* int bgcol_diagnostic_use_napi ; */ +/* + * device management + */ + +#define BGP_MAX_DEVICES 8 +static struct bgpnet_dev bgpnet_devices[BGP_MAX_DEVICES]; +/* static unsigned int bgpnet_num_devices = 0; */ + + +static struct proc_dir_entry* bgpnetDir; +/* static struct proc_dir_entry* barrierEntry; */ +static struct proc_dir_entry* statisticsEntry; +static struct proc_dir_entry* statusEntry; +/* static struct proc_dir_entry* tracemaskEntry; */ +struct bg_col static_col; + +static struct bg_col *__bgcol = &static_col ; + +/* static int bgpnet_add_device(int major, int minor, const char* name, */ +/* unsigned long long base, int irq, */ +/* irqreturn_t (*irq_handler)(int, void*)); */ +/* static int bgpnet_device_open(struct inode *inode, struct file *filp); */ +/* static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *); */ +/* static int bgpnet_device_release(struct inode *inode, struct file * filp); */ +/* static int bgpnet_device_ioctl(struct inode *inode, struct file * filp, */ +/* unsigned int cmd, unsigned long arg); */ +/* static ssize_t bgpnet_device_read(struct file *filp, char __user *buf, size_t count, */ +/* loff_t *f_pos); */ +/* static unsigned int bgpnet_device_poll(struct file *file, poll_table * wait); */ + + +/* static struct file_operations bgpnet_device_fops = */ +/* { */ +/* .owner= THIS_MODULE, */ +/* .open= bgpnet_device_open, */ +/* .read= bgpnet_device_read, */ +/* .write= NULL, */ +/* .poll= bgpnet_device_poll, */ +/* .ioctl= bgpnet_device_ioctl, */ +/* .release= bgpnet_device_release, */ +/* .mmap= bgpnet_device_mmap, */ +/* }; */ + +struct bg_col *bgcol_get_dev() +{ + return __bgcol; +} + +unsigned int bgcol_get_nodeid(struct bg_col* col) +{ + return col->nodeid; +} + +/********************************************************************** + * IRQs + **********************************************************************/ + +/* static irqreturn_t bgcol_unhandled_interrupt(int irq, void *dev, struct pt_regs* regs) */ +/* { */ +/* panic("col: unhandled irq %d\n", irq); */ +/* } */ + +static irqreturn_t bgcol_duplex_interrupt(int irq, void *dev); + +#define IRQ_IDX_INJECT 0 +#define IRQ_IDX_RECEIVE 1 + +#define DEF_IRQ(_irq, _name, _handler) \ +{ .irq = _irq, .name = _name, .handler = _handler } + +#define BG_COL_IRQ_INJ 180 +#define BG_COL_IRQ_RCV 181 + +#define BG_COL_IRQ_GROUP 5 +#define BG_COL_IRQ_INJ_GINT 20 +#define BG_COL_IRQ_RCV_GINT 21 + +/* Linux 'virtual interrupt' numbers corresponding to how the collective is wired to the BIC */ +enum { + k_inject_irq = (5*32 + 20) + 32 , + k_receive_irq = (5*32 + 21) + 32 +} ; + +static struct { + unsigned irq; + char *name; + irqreturn_t (*handler)(int irq, void *dev); +} bgcol_irqs [] = { + DEF_IRQ(k_inject_irq, "Tree inject", bgcol_duplex_interrupt), /* IRQ_IDX_INJECT */ + DEF_IRQ(k_receive_irq, "Tree receive", bgcol_duplex_interrupt), /* IRQ_IDX_RECEIVE */ +#if 0 + DEF_IRQ("Tree VC0", bgcol_receive_interrupt), + DEF_IRQ("Tree VC1", bgcol_receive_interrupt), + DEF_IRQ("Tree CRNI timeout", bgcol_unhandled_interrupt), + DEF_IRQ("Tree no-target", bgcol_unhandled_interrupt), + DEF_IRQ("Tree ALU overflow", bgcol_unhandled_interrupt), + DEF_IRQ("Tree local client inject", bgcol_unhandled_interrupt), + DEF_IRQ("Tree local client receive", bgcol_unhandled_interrupt), + DEF_IRQ("Tree write send CH0", bgcol_unhandled_interrupt), + DEF_IRQ("Tree ECC send CH0", bgcol_unhandled_interrupt), + DEF_IRQ("Tree link CRC send CH0", bgcol_unhandled_interrupt), + DEF_IRQ("Tree write send CH1", bgcol_unhandled_interrupt), + DEF_IRQ("Tree ECC send CH1", bgcol_unhandled_interrupt), + DEF_IRQ("Tree link CRC send CH1", bgcol_unhandled_interrupt), + DEF_IRQ("Tree write send CH2", bgcol_unhandled_interrupt), + DEF_IRQ("Tree ECC send CH2", bgcol_unhandled_interrupt), + DEF_IRQ("Tree link CRC send CH2", bgcol_unhandled_interrupt), + DEF_IRQ("Tree ECC rcv CH0", bgcol_unhandled_interrupt), + DEF_IRQ("Tree link CRC rcv CH0", bgcol_unhandled_interrupt), + DEF_IRQ("Tree ECC rcv CH1", bgcol_unhandled_interrupt), + DEF_IRQ("Tree link CRC rcv CH1", bgcol_unhandled_interrupt), + DEF_IRQ("Tree ECC rcv CH2", bgcol_unhandled_interrupt), + DEF_IRQ("Tree link CRC rcv CH2", bgcol_unhandled_interrupt), +#endif + { -1,NULL, NULL } +}; + + +/********************************************************************** + * Debug + **********************************************************************/ + +static inline void dump_skb(struct sk_buff *skb) +{ + TRACEN(k_t_general,"sk_buff at %p, data=%p, len=%d", skb,skb->data, skb->len) ; +#if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) + if( bgcol_debug_tracemask & k_t_detail ) + { + int i; + for (i = 0; i < skb->len / 4 + 1; i++) + printk("%08x%c", ((u32*)skb->data)[i], (i + 1) % 8 ? ' ' : '\n'); + printk("\n"); + } +#endif +} + +static inline void dump_skb_partial(struct sk_buff *skb, int maxlength) +{ + TRACEN(k_t_general,"sk_buff at %p, data=%p, len=%d", skb,skb->data, skb->len) ; +#if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) + if( bgcol_debug_tracemask & k_t_detail ) + { + int j = (maxlength > skb->len) ? skb->len : maxlength ; + int i; + for (i = 0; i < j / 4 + 1; i++) + printk("%08x%c", ((u32*)skb->data)[i], (i + 1) % 8 ? ' ' : '\n'); + printk("\n"); + } +#endif +} + +static inline void dump_bgcol_packet(struct bglink_hdr_col *lnkhdr, void * payload) + { + TRACEN(k_t_general,"bgcol_packet: hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", + lnkhdr->conn_id, lnkhdr->this_pkt, lnkhdr->total_pkt, lnkhdr->dst_key, lnkhdr->src_key); +#if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) + if( bgcol_debug_tracemask & k_t_detail ) + { + int i ; + int * pi = (int *) payload ; + for( i=0; i<COL_FRAGPAYLOAD/sizeof(int); i += 8) + { + TRACEN(k_t_bgcolpkt," %04x %08x %08x %08x %08x %08x %08x %08x %08x", + 4*i, pi[i+0], pi[i+1], pi[i+2], pi[i+3], pi[i+4], pi[i+5], pi[i+6], pi[i+7] + ) ; + } + } +#endif + } + +/* Delivery of skbuffs to linux networking layer */ +/* Deliver an 'sk_buff' via a work queue, so that 'this' core can spend its time draining the collective hardware */ +struct bgcol_workqueue_item +{ + struct work_struct work ; + struct bglink_proto *proto ; + unsigned int src_key ; +}; +static void bgcol_workqueue_actor(struct work_struct * work) +{ + char * cb = (char *) work ; + struct sk_buff *skb = (struct sk_buff *) (cb - offsetof(struct sk_buff, cb)) ; + struct bgcol_workqueue_item * bgcol_work =(struct bgcol_workqueue_item *) work ; + TRACEN(k_t_napi,"(>) work=%p skb=%p", work, skb) ; + bgcol_work->proto->col_rcv_trimmed(&static_col,skb,bgcol_work->proto,bgcol_work->src_key) ; + replenish_list_for_filling(&static_col) ; + TRACEN(k_t_napi,"(<)") ; +} +static void bgcol_deliver_via_workqueue(struct sk_buff *skb, struct bglink_hdr_col *lnkhdr, struct bglink_proto *proto ) +{ + struct bgcol_workqueue_item * bgcol_work = (struct bgcol_workqueue_item *)(skb->cb) ; + int rc ; + TRACEN(k_t_napi,"(>)skb=%p", skb) ; + __skb_pull(skb, lnkhdr->opt.opt_net.pad_head); + __skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail); + INIT_WORK(&bgcol_work->work,bgcol_workqueue_actor) ; + bgcol_work->proto = proto ; + bgcol_work->src_key = lnkhdr->src_key ; + rc=schedule_work_on(k_WorkqueueDeliveryCPU,&bgcol_work->work) ; + TRACEN(k_t_napi,"(<) rc=%d",rc) ; +} +/********************************************************************** + * Interrupt handling + **********************************************************************/ + +/* Enable receive interrupts */ +void bgcol_enable_interrupts(struct bg_col *bgcol) +{ + unsigned rec_enable; + unsigned long flags ; + TRACE( "(>) bgcol=%p", bgcol); + printk(KERN_NOTICE "enable ints \n"); + + spin_lock_irqsave(&bgcol->lock, flags); + + /* set watermarks */ + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG0, _TR_GLOB_VCFG_RWM(0) ); + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG1, _TR_GLOB_VCFG_RWM(0) ); + /* set watermarks */ + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG0, _TR_GLOB_VCFG_IWM(4) ); /* let transmit fifos get half empty before interrupting */ + + rec_enable = mfdcrx(bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN); + rec_enable |= COL_IRQMASK_REC; + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, rec_enable ); + + /* clear exception flags */ + mfdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXF ); + mfdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXF ); + + spin_unlock_irqrestore(&bgcol->lock, flags); + TRACE( "(<) rec_enable:0x%08x", rec_enable); +} + +static inline void bgcol_enable_interrupts_rcv(struct bg_col *bgcol) +{ + unsigned rec_enable; + TRACE( "(>) bgcol=%p", bgcol); + rec_enable = COL_IRQMASK_REC ; + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, rec_enable ); + + TRACE( "(<) rec_enable:0x%08x", rec_enable); +} + +static inline void bgcol_enable_interrupts_xmit(struct bg_col *bgcol) +{ + TRACE( "bgcol=%p", bgcol); + + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXEN, (_TR_INJ_PIX_ENABLE | _TR_INJ_PIX_WM0 ) ); + +} + + +static inline void bgcol_disable_interrupts(struct bg_col *bgcol) +{ + TRACEN(k_t_irqflow,"bgcol=%p", bgcol); + + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXEN, _TR_INJ_PIX_ENABLE ); + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, 0 ); + +} + +static inline void bgcol_disable_interrupts_rcv(struct bg_col *bgcol) +{ + TRACEN(k_t_irqflow,"bgcol=%p", bgcol); + + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, 0 ); + +} + +static inline void bgcol_disable_interrupts_xmit(struct bg_col *bgcol) +{ + TRACEN(k_t_irqflow, "bgcol=%p", bgcol); + + mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXEN, _TR_INJ_PIX_ENABLE ); +} +void bgcol_enable_rcv_wm_interrupt(struct bgcol_channel* chn) +{ + unsigned long flags; + unsigned long prxen; + + spin_lock_irqsave(&chn->col->lock, flags); + chn->irq_rcv_pending_mask = COL_IRQ_RCV_PENDING_MASK(chn->idx); + prxen = mfdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN); + if (chn->idx) + mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, prxen | _TR_REC_PRX_WM1); + else + mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, prxen | _TR_REC_PRX_WM0); + spin_unlock_irqrestore(&chn->col->lock, flags); + + return; +} +static void inj_timeout(unsigned long colArg) +{ + printk(KERN_INFO "bgcol: inject fifo timed out!\n"); +} + +void bgcol_set_mtu(struct bg_col *bgcol, unsigned int mtu) + { + unsigned int max_packets_per_frame=(mtu+COL_FRAGPAYLOAD-1) / COL_FRAGPAYLOAD ; + bgcol->max_packets_per_frame = max_packets_per_frame ; + bgcol->mtu = max_packets_per_frame * COL_FRAGPAYLOAD + COL_SKB_ALIGN ; + } + +/* Inject a 16-byte header and a COL_FRAGPAYLOAD-byte payload */ +static inline void bgcol_payload_inject(void *port, void* first_quad, void *remaining_quads) + { +/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */ +/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */ + asm volatile( + "lfpdx 0,0,%[first_quad] \n\t" /* F0=Q0 load */ + "li 3,16 \n\t" /* Indexing values */ + "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li 4,32 \n\t" /* Indexing values */ + "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */ + "li 3,48 \n\t" /* Indexing values */ + "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */ + "li 4,64 \n\t" /* Indexing values */ + "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */ + "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */ + "li 3,80 \n\t" /* Indexing values */ + "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */ + "li 4,96 \n\t" /* Indexing values */ + "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */ + "li 3,112 \n\t" /* Indexing values */ + "stfpdx 1,0,%[port] \n\t" /* Q1 store */ + "stfpdx 2,0,%[port] \n\t" /* Q2 store */ + "stfpdx 3,0,%[port] \n\t" /* Q3 store */ + "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */ + "li 4,128 \n\t" /* Indexing values */ + "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */ + "li 3,144 \n\t" /* Indexing values */ + "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */ + "li 4,160 \n\t" /* Indexing values */ + "stfpdx 4,0,%[port] \n\t" /* Q4 store */ + "stfpdx 5,0,%[port] \n\t" /* Q5 store */ + "stfpdx 6,0,%[port] \n\t" /* Q6 store */ + "lfpdx 0,3,%[remaining_quads] \n\t" /* F0=Q10 load */ + "li 3,176 \n\t" /* Indexing values */ + "lfpdx 1,4,%[remaining_quads] \n\t" /* F1=Q11 load */ + "li 4,192 \n\t" /* Indexing values */ + "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q12 load */ + "li 3,208 \n\t" /* Indexing values */ + "stfpdx 7,0,%[port] \n\t" /* Q7 store */ + "stfpdx 8,0,%[port] \n\t" /* Q8 store */ + "stfpdx 9,0,%[port] \n\t" /* Q9 store */ + "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q13 load */ + "li 4,224 \n\t" /* Indexing values */ + "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q14 load */ + "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q15 load */ + "stfpdx 0,0,%[port] \n\t" /* Q10 store */ + "stfpdx 1,0,%[port] \n\t" /* Q11 store */ + "stfpdx 2,0,%[port] \n\t" /* Q12 store */ + "stfpdx 3,0,%[port] \n\t" /* Q13 store */ + "stfpdx 4,0,%[port] \n\t" /* Q14 store */ + "stfpdx 5,0,%[port] \n\t" /* Q15 store */ + : + : [first_quad] "b" (first_quad) , /* Inputs */ + [remaining_quads] "b" (remaining_quads), + [port] "b" (port) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", + "r3" , "r4" ); + } +/* Inject a 16-byte header and a COL_FRAGPAYLOAD-byte payload */ +static inline void bgcol_payload_inject2(void *port, double* first_quad_0, double* first_quad_1, void *remaining_quads) + { +/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */ +/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */ + asm volatile( + "lfdx 0,0,%[first_quad_0] \n\t" /* F0=Q0 load */ + "lfsdx 0,0,%[first_quad_1] \n\t" /* F0=Q0 load */ + "li 3,16 \n\t" /* Indexing values */ + "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li 4,32 \n\t" /* Indexing values */ + "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */ + "li 3,48 \n\t" /* Indexing values */ + "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */ + "li 4,64 \n\t" /* Indexing values */ + "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */ + "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */ + "li 3,80 \n\t" /* Indexing values */ + "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */ + "li 4,96 \n\t" /* Indexing values */ + "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */ + "li 3,112 \n\t" /* Indexing values */ + "stfpdx 1,0,%[port] \n\t" /* Q1 store */ + "stfpdx 2,0,%[port] \n\t" /* Q2 store */ + "stfpdx 3,0,%[port] \n\t" /* Q3 store */ + "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */ + "li 4,128 \n\t" /* Indexing values */ + "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */ + "li 3,144 \n\t" /* Indexing values */ + "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */ + "li 4,160 \n\t" /* Indexing values */ + "stfpdx 4,0,%[port] \n\t" /* Q4 store */ + "stfpdx 5,0,%[port] \n\t" /* Q5 store */ + "stfpdx 6,0,%[port] \n\t" /* Q6 store */ + "lfpdx 0,3,%[remaining_quads] \n\t" /* F0=Q10 load */ + "li 3,176 \n\t" /* Indexing values */ + "lfpdx 1,4,%[remaining_quads] \n\t" /* F1=Q11 load */ + "li 4,192 \n\t" /* Indexing values */ + "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q12 load */ + "li 3,208 \n\t" /* Indexing values */ + "stfpdx 7,0,%[port] \n\t" /* Q7 store */ + "stfpdx 8,0,%[port] \n\t" /* Q8 store */ + "stfpdx 9,0,%[port] \n\t" /* Q9 store */ + "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q13 load */ + "li 4,224 \n\t" /* Indexing values */ + "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q14 load */ + "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q15 load */ + "stfpdx 0,0,%[port] \n\t" /* Q10 store */ + "stfpdx 1,0,%[port] \n\t" /* Q11 store */ + "stfpdx 2,0,%[port] \n\t" /* Q12 store */ + "stfpdx 3,0,%[port] \n\t" /* Q13 store */ + "stfpdx 4,0,%[port] \n\t" /* Q14 store */ + "stfpdx 5,0,%[port] \n\t" /* Q15 store */ + : + : [first_quad_0] "b" (first_quad_0) , /* Inputs */ + [first_quad_1] "b" (first_quad_1) , /* Inputs */ + [remaining_quads] "b" (remaining_quads), + [port] "b" (port) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", + "r3" , "r4" ); + } +/* load a bgcol payload's worth from memory into registers */ +static inline void bgcol_payload_inject_load(void* first_quad, void *remaining_quads) + { +/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */ +/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */ + asm volatile( + "lfpdx 0,0,%[first_quad] \n\t" /* F0=Q0 load */ + "li 3,16 \n\t" /* Indexing values */ + "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li 4,32 \n\t" /* Indexing values */ + "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */ + "li 3,48 \n\t" /* Indexing values */ + "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */ + "li 4,64 \n\t" /* Indexing values */ + "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */ + "li 3,80 \n\t" /* Indexing values */ + "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */ + "li 4,96 \n\t" /* Indexing values */ + "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */ + "li 3,112 \n\t" /* Indexing values */ + "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */ + "li 4,128 \n\t" /* Indexing values */ + "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */ + "li 3,144 \n\t" /* Indexing values */ + "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */ + "li 4,160 \n\t" /* Indexing values */ + "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */ + "li 3,176 \n\t" /* Indexing values */ + "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */ + "li 4,192 \n\t" /* Indexing values */ + "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */ + "li 3,208 \n\t" /* Indexing values */ + "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */ + "li 4,224 \n\t" /* Indexing values */ + "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */ + "lfpdx 15,4,%[remaining_quads] \n\t" /* F5=Q15 load */ + : + : [first_quad] "b" (first_quad) , /* Inputs */ + [remaining_quads] "b" (remaining_quads) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15","r3" , "r4" ); + } +static inline void bgcol_payload_inject_load2(double* first_quad_0, double* first_quad_1, void *remaining_quads) + { +/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */ +/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */ + asm volatile( + "lfdx 0,0,%[first_quad_0] \n\t" /* F0=Q0 load */ + "lfsdx 0,0,%[first_quad_1] \n\t" /* F0=Q0 load */ + "li 3,16 \n\t" /* Indexing values */ + "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li 4,32 \n\t" /* Indexing values */ + "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */ + "li 3,48 \n\t" /* Indexing values */ + "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */ + "li 4,64 \n\t" /* Indexing values */ + "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */ + "li 3,80 \n\t" /* Indexing values */ + "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */ + "li 4,96 \n\t" /* Indexing values */ + "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */ + "li 3,112 \n\t" /* Indexing values */ + "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */ + "li 4,128 \n\t" /* Indexing values */ + "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */ + "li 3,144 \n\t" /* Indexing values */ + "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */ + "li 4,160 \n\t" /* Indexing values */ + "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */ + "li 3,176 \n\t" /* Indexing values */ + "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */ + "li 4,192 \n\t" /* Indexing values */ + "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */ + "li 3,208 \n\t" /* Indexing values */ + "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */ + "li 4,224 \n\t" /* Indexing values */ + "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */ + "lfpdx 15,4,%[remaining_quads] \n\t" /* F5=Q15 load */ + : + : [first_quad_0] "b" (first_quad_0) , /* Inputs */ + [first_quad_1] "b" (first_quad_1) , /* Inputs */ + [remaining_quads] "b" (remaining_quads) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15","r3" , "r4" ); + } +static inline void bgcol_payload_inject_load2partial(double* first_quad_0, double* first_quad_1, void *remaining_quads, int quadcount ) + { +/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */ +/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */ + asm volatile( + "mtctr %[quadcount] \n\t" + "lfdx 0,0,%[first_quad_0] \n\t" /* F0=Q0 load */ + "lfsdx 0,0,%[first_quad_1] \n\t" /* F0=Q0 load */ + "li 3,16 \n\t" /* Indexing values */ + "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "bdz 1 \n\t" /* Skip out if done */ + "li 4,32 \n\t" /* Indexing values */ + "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 3,48 \n\t" /* Indexing values */ + "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 4,64 \n\t" /* Indexing values */ + "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 3,80 \n\t" /* Indexing values */ + "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 4,96 \n\t" /* Indexing values */ + "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 3,112 \n\t" /* Indexing values */ + "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 4,128 \n\t" /* Indexing values */ + "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 3,144 \n\t" /* Indexing values */ + "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 4,160 \n\t" /* Indexing values */ + "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 3,176 \n\t" /* Indexing values */ + "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 4,192 \n\t" /* Indexing values */ + "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 3,208 \n\t" /* Indexing values */ + "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */ + "bdz 1 \n\t" /* Skip out if done */ + "li 4,224 \n\t" /* Indexing values */ + "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */ + "bdz 1 \n\t" /* Skip out if done */ + "lfpdx 15,4,%[remaining_quads] \n" /* F5=Q15 load */ + "1: \n\t" /* Jump-out label */ + : + : [first_quad_0] "b" (first_quad_0) , /* Inputs */ + [first_quad_1] "b" (first_quad_1) , /* Inputs */ + [remaining_quads] "b" (remaining_quads) , + [quadcount] "r" (quadcount) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15","r3" , "r4" ); + } +static inline void bgcol_payload_inject_storeload(void *port, void* first_quad, void *remaining_quads) + { +/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */ +/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */ + asm volatile( + "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */ + "lfpdx 0,0,%[first_quad] \n\t" /* F0=Q0 load */ + "stfpdx 1,0,%[port] \n\t" /* Q1 store */ + "li 3,16 \n\t" /* Indexing values */ + "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "stfpdx 2,0,%[port] \n\t" /* Q2 store */ + "li 4,32 \n\t" /* Indexing values */ + "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */ + "stfpdx 3,0,%[port] \n\t" /* Q3 store */ + "li 3,48 \n\t" /* Indexing values */ + "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */ + "stfpdx 4,0,%[port] \n\t" /* Q4 store */ + "li 4,64 \n\t" /* Indexing values */ + "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */ + "stfpdx 5,0,%[port] \n\t" /* Q5 store */ + "li 3,80 \n\t" /* Indexing values */ + "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */ + "stfpdx 6,0,%[port] \n\t" /* Q6 store */ + "li 4,96 \n\t" /* Indexing values */ + "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */ + "stfpdx 7,0,%[port] \n\t" /* Q7 store */ + "li 3,112 \n\t" /* Indexing values */ + "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */ + "stfpdx 8,0,%[port] \n\t" /* Q8 store */ + "li 4,128 \n\t" /* Indexing values */ + "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */ + "stfpdx 9,0,%[port] \n\t" /* Q9 store */ + "li 3,144 \n\t" /* Indexing values */ + "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */ + "stfpdx 10,0,%[port] \n\t" /* Q10 store */ + "li 4,160 \n\t" /* Indexing values */ + "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */ + "stfpdx 11,0,%[port] \n\t" /* Q11 store */ + "li 3,176 \n\t" /* Indexing values */ + "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */ + "stfpdx 12,0,%[port] \n\t" /* Q12 store */ + "li 4,192 \n\t" /* Indexing values */ + "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */ + "stfpdx 13,0,%[port] \n\t" /* Q13 store */ + "li 3,208 \n\t" /* Indexing values */ + "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */ + "stfpdx 14,0,%[port] \n\t" /* Q14 store */ + "li 4,224 \n\t" /* Indexing values */ + "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */ + "stfpdx 15,0,%[port] \n\t" /* Q15 store */ + "lfpdx 15,4,%[remaining_quads] \n\t" /* F5=Q15 load */ + : + : [first_quad] "b" (first_quad) , /* Inputs */ + [remaining_quads] "b" (remaining_quads), + [port] "b" (port) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15","r3" , "r4" ); + } +static inline void bgcol_payload_inject_storeload2(void *port, double* first_quad_0, double* first_quad_1, void *remaining_quads) + { +/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */ +/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */ + asm volatile( + "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */ + "lfdx 0,0,%[first_quad_0] \n\t" /* F0=Q0 load */ + "lfsdx 0,0,%[first_quad_1] \n\t" /* F0=Q0 load */ + "stfpdx 1,0,%[port] \n\t" /* Q1 store */ + "li 3,16 \n\t" /* Indexing values */ + "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "stfpdx 2,0,%[port] \n\t" /* Q2 store */ + "li 4,32 \n\t" /* Indexing values */ + "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */ + "stfpdx 3,0,%[port] \n\t" /* Q3 store */ + "li 3,48 \n\t" /* Indexing values */ + "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */ + "stfpdx 4,0,%[port] \n\t" /* Q4 store */ + "li 4,64 \n\t" /* Indexing values */ + "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */ + "stfpdx 5,0,%[port] \n\t" /* Q5 store */ + "li 3,80 \n\t" /* Indexing values */ + "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */ + "stfpdx 6,0,%[port] \n\t" /* Q6 store */ + "li 4,96 \n\t" /* Indexing values */ + "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */ + "stfpdx 7,0,%[port] \n\t" /* Q7 store */ + "li 3,112 \n\t" /* Indexing values */ + "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */ + "stfpdx 8,0,%[port] \n\t" /* Q8 store */ + "li 4,128 \n\t" /* Indexing values */ + "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */ + "stfpdx 9,0,%[port] \n\t" /* Q9 store */ + "li 3,144 \n\t" /* Indexing values */ + "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */ + "stfpdx 10,0,%[port] \n\t" /* Q10 store */ + "li 4,160 \n\t" /* Indexing values */ + "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */ + "stfpdx 11,0,%[port] \n\t" /* Q11 store */ + "li 3,176 \n\t" /* Indexing values */ + "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */ + "stfpdx 12,0,%[port] \n\t" /* Q12 store */ + "li 4,192 \n\t" /* Indexing values */ + "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */ + "stfpdx 13,0,%[port] \n\t" /* Q13 store */ + "li 3,208 \n\t" /* Indexing values */ + "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */ + "stfpdx 14,0,%[port] \n\t" /* Q14 store */ + "li 4,224 \n\t" /* Indexing values */ + "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */ + "stfpdx 15,0,%[port] \n\t" /* Q15 store */ + "lfpdx 15,4,%[remaining_quads] \n\t" /* F5=Q15 load */ + : + : [first_quad_0] "b" (first_quad_0) , /* Inputs */ + [first_quad_1] "b" (first_quad_1) , /* Inputs */ + [remaining_quads] "b" (remaining_quads), + [port] "b" (port) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15","r3" , "r4" ); + } +static inline void bgcol_payload_inject_store(void *port) + { + asm volatile( + "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */ + "stfpdx 1,0,%[port] \n\t" /* Q1 store */ + "stfpdx 2,0,%[port] \n\t" /* Q2 store */ + "stfpdx 3,0,%[port] \n\t" /* Q3 store */ + "stfpdx 4,0,%[port] \n\t" /* Q4 store */ + "stfpdx 5,0,%[port] \n\t" /* Q5 store */ + "stfpdx 6,0,%[port] \n\t" /* Q6 store */ + "stfpdx 7,0,%[port] \n\t" /* Q7 store */ + "stfpdx 8,0,%[port] \n\t" /* Q8 store */ + "stfpdx 9,0,%[port] \n\t" /* Q9 store */ + "stfpdx 10,0,%[port] \n\t" /* Q10 store */ + "stfpdx 11,0,%[port] \n\t" /* Q11 store */ + "stfpdx 12,0,%[port] \n\t" /* Q12 store */ + "stfpdx 13,0,%[port] \n\t" /* Q13 store */ + "stfpdx 14,0,%[port] \n\t" /* Q14 store */ + "stfpdx 15,0,%[port] \n\t" /* Q15 store */ + : + : /* inputs */ + [port] "b" (port) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15" ); + } + +/* receive a COL_FRAGPAYLOAD-byte payload */ +static inline void bgcol_payload_receive240(void *port, void *remaining_quads) + { +/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */ + asm volatile( + "lfpdx 1,0,%[port] \n\t" /* Q1 store */ + "lfpdx 2,0,%[port] \n\t" /* Q2 store */ + "lfpdx 3,0,%[port] \n\t" /* Q3 store */ + "lfpdx 4,0,%[port] \n\t" /* Q4 store */ + "lfpdx 5,0,%[port] \n\t" /* Q5 store */ + "lfpdx 6,0,%[port] \n\t" /* Q6 store */ + "lfpdx 7,0,%[port] \n\t" /* Q7 store */ + "lfpdx 8,0,%[port] \n\t" /* Q8 store */ + "lfpdx 9,0,%[port] \n\t" /* Q9 store */ + "lfpdx 0,0,%[port] \n\t" /* Q10 store */ + "li 3,16 \n\t" /* Indexing values */ + "stfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li 4,32 \n\t" /* Indexing values */ + "stfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */ + "lfpdx 1,0,%[port] \n\t" /* Q11 store */ + "li 3,48 \n\t" /* Indexing values */ + "stfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */ + "lfpdx 2,0,%[port] \n\t" /* Q12 store */ + "li 4,64 \n\t" /* Indexing values */ + "stfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */ + "lfpdx 3,0,%[port] \n\t" /* Q13 store */ + "li 3,80 \n\t" /* Indexing values */ + "stfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */ + "lfpdx 4,0,%[port] \n\t" /* Q14 store */ + "li 4,96 \n\t" /* Indexing values */ + "stfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */ + "lfpdx 5,0,%[port] \n\t" /* Q15 store */ + "li 3,112 \n\t" /* Indexing values */ + "stfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */ + "li 4,128 \n\t" /* Indexing values */ + "stfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */ + "li 3,144 \n\t" /* Indexing values */ + "stfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */ + "li 4,160 \n\t" /* Indexing values */ + "stfpdx 0,3,%[remaining_quads] \n\t" /* F0=Q10 load */ + "li 3,176 \n\t" /* Indexing values */ + "stfpdx 1,4,%[remaining_quads] \n\t" /* F1=Q11 load */ + "li 4,192 \n\t" /* Indexing values */ + "stfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q12 load */ + "li 3,208 \n\t" /* Indexing values */ + "stfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q13 load */ + "li 4,224 \n\t" /* Indexing values */ + "stfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q14 load */ + "stfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q15 load */ + : + : /* Inputs */ + [remaining_quads] "b" (remaining_quads), + [port] "b" (port) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "r3" , "r4" ); + } + + +/* Load a full bgcol payload into 16 parallel floating point registers */ +/* Caution ... the compiler doesn't know that we want the regs later on */ +static inline unsigned int bgcol_payload_load( + void *port, /* The FIFO port */ + void *lnkhdr, /* Where to put the first 16 bytes of the payload */ + void *destport /* Which address to tap to ask for the next packet */ + ) + { + unsigned int src_key ; + unsigned int dummy ; + struct { unsigned char c [16] ; } *lnkhdrc = lnkhdr ; +/* BUG_ON((((int)lnkhdr) & 0xf) != 0) ; */ + + asm ( + "lfpdx 0,0,%[port] \n\t" /* lnkhdr */ + "lfpdx 1,0,%[port] \n\t" /* Q1 store */ + "lfpdx 2,0,%[port] \n\t" /* Q2 store */ + "lfpdx 3,0,%[port] \n\t" /* Q3 store */ + "lfpdx 4,0,%[port] \n\t" /* Q4 store */ + "lfpdx 5,0,%[port] \n\t" /* Q5 store */ + "lfpdx 6,0,%[port] \n\t" /* Q6 store */ + "stfpdx 0,0,%[lnkhdr] \n\t" + "lfpdx 7,0,%[port] \n\t" /* Q7 store */ + "lfpdx 8,0,%[port] \n\t" /* Q8 store */ + "lfpdx 9,0,%[port] \n\t" /* Q9 store */ + "lfpdx 10,0,%[port] \n\t" /* Q10 store */ + "lfpdx 11,0,%[port] \n\t" /* Q11 store */ + "lfpdx 12,0,%[port] \n\t" /* Q12 store */ + "lwz %[src_key],4(%[lnkhdr]) \n\t" + "lfpdx 13,0,%[port] \n\t" /* Q13 store */ + "lfpdx 14,0,%[port] \n\t" /* Q14 store */ + "lfpdx 15,0,%[port] \n\t" /* Q15 store */ + "lwz %[dummy],0(%[destport]) \n\t" /* trigger to pull the next packet in */ + : /* outputs */ + [dummy] "=r" (dummy), + [src_key] "=b" (src_key), + "=m" (*lnkhdrc) + : /* Inputs */ + [port] "b" (port) , + [lnkhdr] "b" (lnkhdrc) , + [destport] "b" (destport) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15" + ); + TRACEN(k_t_fifocontents, "bgcol_payload_load src_key=%08x",src_key) ; + return src_key ; + } + +static inline unsigned int bgcol_payload_load2( + void *port, /* The FIFO port */ + double *lnkhdr0, /* Where to put the first 8 bytes of the payload */ + double *lnkhdr1, /* Where to put the second 8 bytes of the payload */ + void *destport /* Which address to tap to ask for the next packet */ + ) + { + unsigned int src_key ; + unsigned int dummy ; +/* BUG_ON((((int)lnkhdr0) & 0x07) != 0) ; */ +/* BUG_ON((((int)lnkhdr1) & 0x07) != 0) ; */ + + asm ( + "lfpdx 0,0,%[port] \n\t" /* lnkhdr */ + "lfpdx 1,0,%[port] \n\t" /* Q1 store */ + "lfpdx 2,0,%[port] \n\t" /* Q2 store */ + "lfpdx 3,0,%[port] \n\t" /* Q3 store */ + "lfpdx 4,0,%[port] \n\t" /* Q4 store */ + "lfpdx 5,0,%[port] \n\t" /* Q5 store */ + "lfpdx 6,0,%[port] \n\t" /* Q6 store */ + "stfdx 0,0,%[lnkhdr0] \n\t" + "lfpdx 7,0,%[port] \n\t" /* Q7 store */ + "stfsdx 0,0,%[lnkhdr1] \n\t" + "lfpdx 8,0,%[port] \n\t" /* Q8 store */ + "lfpdx 9,0,%[port] \n\t" /* Q9 store */ + "lfpdx 10,0,%[port] \n\t" /* Q10 store */ + "lfpdx 11,0,%[port] \n\t" /* Q11 store */ + "lfpdx 12,0,%[port] \n\t" /* Q12 store */ + "lwz %[src_key],4(%[lnkhdr0]) \n\t" + "lfpdx 13,0,%[port] \n\t" /* Q13 store */ + "lfpdx 14,0,%[port] \n\t" /* Q14 store */ + "lfpdx 15,0,%[port] \n\t" /* Q15 store */ + "lwz %[dummy],0(%[destport]) \n\t" /* trigger to pull the next packet in */ + : /* outputs */ + [dummy] "=r" (dummy), + [src_key] "=b" (src_key), + "=m" (*lnkhdr0), + "=m" (*lnkhdr1) + : /* Inputs */ + [port] "b" (port) , + [lnkhdr0] "b" (lnkhdr0) , + [lnkhdr1] "b" (lnkhdr1) , + [destport] "b" (destport) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15" + ); + TRACEN(k_t_fifocontents, "bgcol_payload_load src_key=%08x",src_key) ; + return src_key ; + } + +/* Save the previous payload to store, and load the next payload from FIFO */ +static inline unsigned int bgcol_payload_storeload( + void *port, + void *lnkhdr, + void * payloadptr, + void *destport ) + { + unsigned int index1 ; + unsigned int index2 ; + unsigned int src_key ; + struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload ; + struct { unsigned char c [16] ; } *lnkhdrc ; +/* BUG_ON((((int)lnkhdr) & 0xf) != 0) ; */ +/* BUG_ON((((int)payloadptr) & 0xf) != 0) ; */ + + lnkhdrc = lnkhdr ; + + payload = payloadptr; + TRACEN(k_t_fifocontents, "bgcol_payload_storeload payload=%p",payloadptr) ; + + asm ( + "lfpdx 0,0,%[port] \n\t" /* lnkhdr */ + "li %[index1],16 \n\t" /* Indexing values */ + "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li %[index2],32 \n\t" /* Indexing values */ + "lfpdx 1,0,%[port] \n\t" /* Q1 store */ + "stfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */ + "li %[index1],48 \n\t" /* Indexing values */ + "lfpdx 2,0,%[port] \n\t" /* Q2 store */ + "stfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */ + "li %[index2],64 \n\t" /* Indexing values */ + "lfpdx 3,0,%[port] \n\t" /* Q3 store */ + "stfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */ + "li %[index1],80 \n\t" /* Indexing values */ + "lfpdx 4,0,%[port] \n\t" /* Q4 store */ + "stfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */ + "li %[index2],96 \n\t" /* Indexing values */ + "lfpdx 5,0,%[port] \n\t" /* Q5 store */ + "stfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */ + "li %[index1],112 \n\t" /* Indexing values */ + "lfpdx 6,0,%[port] \n\t" /* Q6 store */ + "stfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */ + "li %[index2],128 \n\t" /* Indexing values */ + "lfpdx 7,0,%[port] \n\t" /* Q7 store */ + "stfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */ + "li %[index1],144 \n\t" /* Indexing values */ + "lfpdx 8,0,%[port] \n\t" /* Q8 store */ + "stfpdx 0,0,%[lnkhdr] \n\t" + "stfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */ + "li %[index2],160 \n\t" /* Indexing values */ + "lfpdx 9,0,%[port] \n\t" /* Q9 store */ + "stfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */ + "li %[index1],176 \n\t" /* Indexing values */ + "lfpdx 10,0,%[port] \n\t" /* Q10 store */ + "stfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */ + "li %[index2],192 \n\t" /* Indexing values */ + "lfpdx 11,0,%[port] \n\t" /* Q11 store */ + "stfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */ + "li %[index1],208 \n\t" /* Indexing values */ + "lfpdx 12,0,%[port] \n\t" /* Q12 store */ + "stfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */ + "li %[index2],224 \n\t" /* Indexing values */ + "lfpdx 13,0,%[port] \n\t" /* Q13 store */ + "lwz %[src_key],4(%[lnkhdr]) \n\t" + "stfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */ + "lfpdx 14,0,%[port] \n\t" /* Q14 store */ + "stfpdx 15,%[index2],%[payload] \n\t" /* F5=Q15 load */ + "lfpdx 15,0,%[port] \n\t" /* Q15 store */ + "lwz %[index1],0(%[destport]) \n\t" /* trigger to pull the next packet in */ + : /* outputs */ + [src_key] "=b" (src_key), + "=m" (*payload), + "=m" (*lnkhdrc), + [index1] "=b" (index1), + [index2] "=b" (index2) + : /* Inputs */ + [port] "b" (port) , + [payload] "b" (payload), + [lnkhdr] "b" (lnkhdrc) , + [destport] "b" (destport) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15" + ); + + TRACEN(k_t_fifocontents, "bgcol_payload_storeload src_key=%08x",src_key) ; + return src_key ; + } + +static inline unsigned int bgcol_payload_storeload2( + void *port, + double *lnkhdr0, + double *lnkhdr1, + void * payloadptr, + void *destport ) + { + unsigned int index1 ; + unsigned int index2 ; + unsigned int src_key ; + struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload ; + /* BUG_ON((((int)lnkhdr0) & 0x07) != 0) ; */ + /* BUG_ON((((int)lnkhdr1) & 0x07) != 0) ; */ +/* BUG_ON((((int)payloadptr) & 0xf) != 0) ; */ + + + payload = payloadptr; + TRACEN(k_t_fifocontents, "bgcol_payload_storeload payload=%p",payloadptr) ; + + asm ( + "lfpdx 0,0,%[port] \n\t" /* lnkhdr */ + "li %[index1],16 \n\t" /* Indexing values */ + "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li %[index2],32 \n\t" /* Indexing values */ + "lfpdx 1,0,%[port] \n\t" /* Q1 store */ + "stfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */ + "li %[index1],48 \n\t" /* Indexing values */ + "lfpdx 2,0,%[port] \n\t" /* Q2 store */ + "stfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */ + "li %[index2],64 \n\t" /* Indexing values */ + "lfpdx 3,0,%[port] \n\t" /* Q3 store */ + "stfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */ + "li %[index1],80 \n\t" /* Indexing values */ + "lfpdx 4,0,%[port] \n\t" /* Q4 store */ + "stfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */ + "li %[index2],96 \n\t" /* Indexing values */ + "lfpdx 5,0,%[port] \n\t" /* Q5 store */ + "stfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */ + "li %[index1],112 \n\t" /* Indexing values */ + "lfpdx 6,0,%[port] \n\t" /* Q6 store */ + "stfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */ + "li %[index2],128 \n\t" /* Indexing values */ + "lfpdx 7,0,%[port] \n\t" /* Q7 store */ + "stfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */ + "li %[index1],144 \n\t" /* Indexing values */ + "lfpdx 8,0,%[port] \n\t" /* Q8 store */ + "stfdx 0,0,%[lnkhdr0] \n\t" + "stfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */ + "li %[index2],160 \n\t" /* Indexing values */ + "lfpdx 9,0,%[port] \n\t" /* Q9 store */ + "stfsdx 0,0,%[lnkhdr1] \n\t" + "stfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */ + "li %[index1],176 \n\t" /* Indexing values */ + "lfpdx 10,0,%[port] \n\t" /* Q10 store */ + "stfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */ + "li %[index2],192 \n\t" /* Indexing values */ + "lfpdx 11,0,%[port] \n\t" /* Q11 store */ + "stfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */ + "li %[index1],208 \n\t" /* Indexing values */ + "lfpdx 12,0,%[port] \n\t" /* Q12 store */ + "stfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */ + "li %[index2],224 \n\t" /* Indexing values */ + "lfpdx 13,0,%[port] \n\t" /* Q13 store */ + "lwz %[src_key],4(%[lnkhdr0]) \n\t" + "stfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */ + "lfpdx 14,0,%[port] \n\t" /* Q14 store */ + "stfpdx 15,%[index2],%[payload] \n\t" /* F5=Q15 load */ + "lfpdx 15,0,%[port] \n\t" /* Q15 store */ + "lwz %[index1],0(%[destport]) \n\t" /* trigger to pull the next packet in */ + : /* outputs */ + [src_key] "=b" (src_key), + "=m" (*payload), + "=m" (*lnkhdr0), + "=m" (*lnkhdr1), + [index1] "=b" (index1), + [index2] "=b" (index2) + : /* Inputs */ + [port] "b" (port) , + [payload] "b" (payload), + [lnkhdr0] "b" (lnkhdr0) , + [lnkhdr1] "b" (lnkhdr1) , + [destport] "b" (destport) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15" + ); + + TRACEN(k_t_fifocontents, "bgcol_payload_storeload src_key=%08x",src_key) ; + return src_key ; + } + +/* Save the previous payload to store */ +static inline void bgcol_payload_store( + void * payloadptr) + { + unsigned int index1 ; + unsigned int index2 ; + struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload=payloadptr ; +/* BUG_ON((((int)payloadptr) & 0xf) != 0) ; */ + + TRACEN(k_t_fifocontents, "bgcol_payload_store payload=%p",payload) ; + asm ( + "li %[index1],16 \n\t" /* Indexing values */ + "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li %[index2],32 \n\t" /* Indexing values */ + "stfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */ + "li %[index1],48 \n\t" /* Indexing values */ + "stfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */ + "li %[index2],64 \n\t" /* Indexing values */ + "stfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */ + "li %[index1],80 \n\t" /* Indexing values */ + "stfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */ + "li %[index2],96 \n\t" /* Indexing values */ + "stfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */ + "li %[index1],112 \n\t" /* Indexing values */ + "stfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */ + "li %[index2],128 \n\t" /* Indexing values */ + "stfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */ + "li %[index1],144 \n\t" /* Indexing values */ + "stfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */ + "li %[index2],160 \n\t" /* Indexing values */ + "stfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */ + "li %[index1],176 \n\t" /* Indexing values */ + "stfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */ + "li %[index2],192 \n\t" /* Indexing values */ + "stfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */ + "li %[index1],208 \n\t" /* Indexing values */ + "stfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */ + "li %[index2],224 \n\t" /* Indexing values */ + "stfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */ + "stfpdx 15,%[index2],%[payload] \n\t" /* F5=Q15 load */ + : /* outputs */ + "=m" (*payload), + [index1] "=b" (index1), + [index2] "=b" (index2) + : /* Inputs */ + [payload] "b" (payload) /* inputs */ + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15" + ); + } + +/* receive 256 bytes, a 16-byte header and a 240-byte payload */ +/* returns the 'source key', the key of the node which sent the data */ + +static inline int bgcol_payload_receive256(void *port, + void *lnkhdr, + unsigned char * payload_table[], + unsigned int table_index_mask, + void *destport ) + { + int table_offset ; + int src_key ; + struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload ; + struct { unsigned char c [16] ; } *lnkhdrc = lnkhdr ; + + asm ( + "lfpdx 0,0,%[port] \n\t" /* lnkhdr */ + "lfpdx 1,0,%[port] \n\t" /* Q1 store */ + "lfpdx 2,0,%[port] \n\t" /* Q2 store */ + "lfpdx 3,0,%[port] \n\t" /* Q3 store */ + "lfpdx 4,0,%[port] \n\t" /* Q4 store */ + "lfpdx 5,0,%[port] \n\t" /* Q5 store */ + "lfpdx 6,0,%[port] \n\t" /* Q6 store */ + "stfpdx 0,0,%[lnkhdr] \n\t" + "lfpdx 7,0,%[port] \n\t" /* Q7 store */ + "lfpdx 8,0,%[port] \n\t" /* Q8 store */ + "lfpdx 9,0,%[port] \n\t" /* Q9 store */ + "lwz %[src_key],4(%[lnkhdr]) \n\t" + "lfpdx 10,0,%[port] \n\t" /* Q10 store */ + "lfpdx 11,0,%[port] \n\t" /* Q11 store */ + "lfpdx 12,0,%[port] \n\t" /* Q12 store */ + "and 3,%[src_key],%[table_index_mask] \n\t" + "lfpdx 13,0,%[port] \n\t" /* Q13 store */ + "slwi %[table_offset],3,2 \n\t" + "lfpdx 14,0,%[port] \n\t" /* Q14 store */ + "lwzx %[payload],%[table_offset],%[payload_table] \n\t" + "lfpdx 15,0,%[port] \n\t" /* Q15 store */ + "lwz 5,0(%[destport]) \n\t" /* trigger to pull the next packet in */ + "li 3,16 \n\t" /* Indexing values */ + "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li 4,32 \n\t" /* Indexing values */ + "stfpdx 2,3,%[payload] \n\t" /* F2=Q2 load */ + "li 3,48 \n\t" /* Indexing values */ + "stfpdx 3,4,%[payload] \n\t" /* F3=Q3 load */ + "li 4,64 \n\t" /* Indexing values */ + "stfpdx 4,3,%[payload] \n\t" /* F4=Q4 load */ + "li 3,80 \n\t" /* Indexing values */ + "stfpdx 5,4,%[payload] \n\t" /* F5=Q5 load */ + "li 4,96 \n\t" /* Indexing values */ + "stfpdx 6,3,%[payload] \n\t" /* F6=Q6 load */ + "li 3,112 \n\t" /* Indexing values */ + "stfpdx 7,4,%[payload] \n\t" /* F7=Q7 load */ + "li 4,128 \n\t" /* Indexing values */ + "stfpdx 8,3,%[payload] \n\t" /* F8=Q8 load */ + "li 3,144 \n\t" /* Indexing values */ + "stfpdx 9,4,%[payload] \n\t" /* F9=Q9 load */ + "li 4,160 \n\t" /* Indexing values */ + "stfpdx 10,3,%[payload] \n\t" /* F0=Q10 load */ + "li 3,176 \n\t" /* Indexing values */ + "stfpdx 11,4,%[payload] \n\t" /* F1=Q11 load */ + "li 4,192 \n\t" /* Indexing values */ + "stfpdx 12,3,%[payload] \n\t" /* F2=Q12 load */ + "li 3,208 \n\t" /* Indexing values */ + "stfpdx 13,4,%[payload] \n\t" /* F3=Q13 load */ + "li 4,224 \n\t" /* Indexing values */ + "stfpdx 14,3,%[payload] \n\t" /* F4=Q14 load */ + "stfpdx 15,4,%[payload] \n\t" /* F5=Q15 load */ + : [payload] "=b" (payload), /* outputs */ + [src_key] "=b" (src_key), + [table_offset] "=b" (table_offset), + "=m" (*payload), + "=m" (*lnkhdrc) + : /* Inputs */ + [port] "b" (port) , + [lnkhdr] "b" (lnkhdrc) , + [payload_table] "b" (payload_table) , + [table_index_mask] "b" (table_index_mask) , + [destport] "b" (destport) + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", + "fr15", + "r3" , "r4", "r5" + ); + TRACEN(k_t_fifocontents, "bgcol_payload_receive256 table_offset=%08x payload=%p\n src_key=%08x",table_offset,payload,src_key) ; + return src_key ; + } +/********************************************************************** + * Receive and transmit + **********************************************************************/ + +/* #if defined(COLLECTIVE_DELIVER_VIA_TASKLET) */ +/* static void bgcol_receive_proto_tasklet_handler(unsigned long dummy) */ +/* { */ +/* struct bg_col *bgcol = __bgcol; */ +/* struct sk_buff *skb = skb_dequeue(&bgcol->fragskb_list_rcv); */ +/* */ +/* TRACE("bgnet: (>)bgcol_receive_proto_tasklet_handler"); */ +/* */ +/* while( skb ) */ +/* { */ +// /* deliver to upper protocol layers */ +/* struct bglink_hdr_col *lnkhdrp = (struct bglink_hdr_col *)&(skb->cb) ; */ +/* struct bglink_proto *proto; */ +/* proto = bgcol_find_linkproto(lnkhdrp->lnk_proto); */ +/* if (proto) */ +/* { */ +/* */ +/* TRACE("Handed to proto rcv=%p", proto->rcv) ; */ +/* TRACE("hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x proto=%x", lnkhdrp->conn_id, lnkhdrp->this_pkt, lnkhdrp->total_pkt, lnkhdrp->dst_key, lnkhdrp->src_key, lnkhdrp->lnk_proto); */ +/* dump_skb_partial(skb,64) ; */ +/* TRACE("proto->rcv=%p skb=%p lnkhdrp=%p proto=%p", */ +/* proto->rcv,skb, lnkhdrp, proto */ +/* ) ; */ +/* (void) proto->rcv(skb, lnkhdrp, proto); */ +/* } */ +/* else */ +/* { */ +/* dump_skb_partial(skb,64); */ +/* TRACE("bgcol: unsupported link protocol (%p) %x", proto, lnkhdrp->lnk_proto); */ +/* dev_kfree_skb(skb); */ +/* } */ +/* skb = skb_dequeue(&bgcol->fragskb_list_rcv) ; */ +/* } */ +/* */ +/* TRACE("bgnet: (<)bgcol_receive_proto_tasklet_handler"); */ +/* */ +/* } */ +/* */ +/* static DECLARE_TASKLET(bgcol_receive_proto_tasklet,bgcol_receive_proto_tasklet_handler,0); */ +/* #endif */ + +static inline void bgcol_vacate_slot(struct bg_col *bgcol, unsigned int slot) + { + bgcol->per_eth_table[slot].payload = (void *)0xffffffff ; /* so we get a trap if we try to store through it */ + bgcol->per_eth_table[slot].expect = 0xffffffff ; + bgcol->skb_rcv_table[slot] = NULL ; + TRACEN(k_t_general,"Slot %d vacated",slot ); + } + + +static void init_ethkey_table(struct bg_col *bgcol) + { + int x ; + for( x = 0 ; x < k_ethkey_table_size ; x += 1) + { + bgcol_vacate_slot(bgcol,x) ; + } + } + +#if defined(KEEP_LNKHDR_TRAIL) +static struct bglink_hdr_col lnkhdr_trail[k_lnkhdr_trail_length] ; +static unsigned int lnkhdr_trail_index ; +static unsigned int lnkhdr_trail_shown_index ; +static int trail_shown_count ; + +static void record_lnkhdr_trail(struct bglink_hdr_col *lnkhdr) + { + lnkhdr_trail[lnkhdr_trail_index & (k_lnkhdr_trail_length-1)] = *lnkhdr ; + lnkhdr_trail_index += 1 ; + } + +static void show_lnkhdr_trail(const char * reason) + { + if( trail_shown_count < k_lnhhdr_ffdc_limit ) + { + unsigned int trail_count = (k_lnkhdr_trail_display_length > lnkhdr_trail_index) ? lnkhdr_trail_index : k_lnkhdr_trail_display_length ; + unsigned int current_index = lnkhdr_trail_index - trail_count ; + printk(KERN_INFO "lnkhdr trail to packet %d, reason <%s>:\n", lnkhdr_trail_index, reason) ; + while( current_index != lnkhdr_trail_index) + { + unsigned int x = ( current_index & (k_lnkhdr_trail_length-1)) ; + if( current_index >= lnkhdr_trail_shown_index ) + { + printk(KERN_INFO "lnkhdr_trail[%02x] dst_key=%08x src_key=%08x conn_id=%04x this_pkt=%02x total_pkt=%02x lnk_proto=%04x opt=[%02x:%02x:%02x]\n", + (current_index-lnkhdr_trail_index) & 0xff, + lnkhdr_trail[x].dst_key, + lnkhdr_trail[x].src_key, + lnkhdr_trail[x].conn_id, + lnkhdr_trail[x].this_pkt, + lnkhdr_trail[x].total_pkt, + lnkhdr_trail[x].lnk_proto, + lnkhdr_trail[x].opt.opt_net.option, + lnkhdr_trail[x].opt.opt_net.pad_head, + lnkhdr_trail[x].opt.opt_net.pad_tail + ) ; + } + current_index += 1 ; + + } + trail_shown_count += 1 ; + lnkhdr_trail_shown_index = lnkhdr_trail_index ; + } + } + +static void show_payload(void * payload, unsigned int mioaddr) +{ + if( trail_shown_count < k_lnhhdr_ffdc_limit ) + { + unsigned int *pi=(unsigned int *) payload ; + unsigned int x ; + for(x=0; x<240/sizeof(unsigned int)-9; x+=8) + { + printk(KERN_INFO "payload [%08x %08x %08x %08x %08x %08x %08x %08x]\n", + pi[x],pi[x+1],pi[x+2],pi[x+3],pi[x+4],pi[x+5],pi[x+6],pi[x+7] + ) ; + } ; + printk(KERN_INFO "payload [%08x %08x %08x %08x]\n", + pi[x],pi[x+1],pi[x+2],pi[x+3] + ) ; + } +} +#else +static inline void record_lnkhdr_trail(struct bglink_hdr_col *lnkhdr) + { + + } +static inline void show_lnkhdr_trail(const char * reason) + { + TRACE("%s", reason); + } +static void show_payload(void * payload, unsigned int mioaddr) +{ + TRACE("payload=%p mioaddr=0x%08x", payload, mioaddr); +} + +#endif + +#if !defined(COLLECTIVE_DELIVER_VIA_TASKLET) +static inline void bgcol_deliver_directly(struct bg_col *bgcol,struct bglink_hdr_col *lnkhdr, struct sk_buff *skb) + { + struct bglink_proto *proto; + + /* deliver to upper protocol layers */ + proto = bglink_find_proto(lnkhdr->lnk_proto); + if(!bgcol->deliver_without_workqueue) + { + TRACEN(k_t_general,"Delivering skb=%p via work queue",skb) ; + bgcol_deliver_via_workqueue(skb, lnkhdr,proto) ; + + } + else + { + if (proto) + { + TRACE("Handed to proto=%p", proto) ; + TRACE("hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x proto=%x", lnkhdr->conn_id, lnkhdr->this_pkt, lnkhdr->total_pkt, lnkhdr->dst_key, lnkhdr->src_key, lnkhdr->lnk_proto); + dump_skb_partial(skb,64) ; + TRACE("proto->col_rcv=%p skb=%p lnkhdr=%p proto=%p", + proto->col_rcv,skb, lnkhdr, proto + ) ; + (void) proto->col_rcv(bgcol,skb, lnkhdr, proto); + /* enable_kernel_fp() ; */ + } + else + { + dump_skb_partial(skb,64); + TRACE("bgcol: unsupported link protocol (%p) %x", proto, lnkhdr->lnk_proto); + dev_kfree_skb(skb); + } + replenish_list_for_filling(bgcol) ; + } + } +#endif + +static char scratch_payload[COL_FRAGPAYLOAD] __attribute__((aligned(16))); +static inline int bgcol_receive_mark3(struct bg_col *bgcol, unsigned channel,unsigned int status_in, unsigned int mioaddr) +{ + void *payloadptr; +/* union bgcol_status status; */ + unsigned int unload_count ; + unsigned int unload_index ; + struct bglink_hdr_col lnkhdr __attribute__((aligned(8))); + double *lnkhdrd = (double *)&lnkhdr ; + unsigned int total_unload_count = 0 ; + unsigned int end_frame_hint = 0 ; +#if defined(KEEP_RECV_TOTAL) + unsigned int recv_total = bgcol->recv_total ; +#endif +/* bgcol->recv_total += total_unload_count ; */ + +/* status.raw = status_in ; */ +/* unload_count = status.x.rcv_hdr ; */ + unload_count = bgcol_status_rcv_hdr(status_in) ; +/* bgcol->recv_fifo_histogram2[unload_count & 0x0f ] += 1; */ + TRACE("status=%08x", status_in); + +#if defined(KEEP_RECV_TOTAL) + bgcol->recv_total = recv_total + unload_count ; /* Not exact, for the case where we exit the loop early, but good enought for statistics */ +#endif +#if defined(COLLECTIVE_ONEPASS_TXRX) + if(unload_count > 0) +#else + while(unload_count > 0) +#endif + { + unsigned int received_src_key ; + unsigned int slot ; + unsigned int received_seq ; + unsigned int expected_seq ; + + unsigned int seq_next_packet ; + unsigned int seq_tot_packet ; + unsigned char * deposited_payload ; + unsigned char * next_payload ; + unsigned int received ; + unsigned int expected ; + + /* Load up the FP regs with the first packet from the FIFO, and get ready to analyze it */ + received_src_key=bgcol_payload_load2((void*)mioaddr + _BGP_TRx_DR,lnkhdrd,lnkhdrd+1,(void*)(mioaddr + _BGP_TRx_HR)) ; +#if defined(KEEP_LNKHDR_TRAIL) + record_lnkhdr_trail(&lnkhdr) ; +#endif + slot = received_src_key & (k_ethkey_table_size-1) ; + received = ((unsigned int *)&lnkhdr)[2] ; + expected = bgcol->per_eth_table[slot].expect ; + /* Find if it was an 'expected' packet in context of previous packets from this source */ + received_seq = ( received >> 8 ) & 0xff ; + expected_seq = ( expected >> 8 ) & 0xff ; + seq_tot_packet = expected & 0xff ; + seq_next_packet = expected_seq + 1 ; + + bgcol->per_eth_table[slot].expect = expected + 0x0100 ; + + deposited_payload = bgcol->per_eth_table[slot].payload ; + next_payload = deposited_payload + COL_FRAGPAYLOAD ; + + TRACEN(k_t_detail,"slot=%08x seq(%x,%x) re(%08x,%08x)", + slot, + received_seq, expected_seq, + received, expected + ) ; + + if( ( received == expected ) && (seq_next_packet < seq_tot_packet) ) + { + bgcol->per_eth_table[slot].payload = next_payload ; + for(unload_index=1;unload_index<unload_count;unload_index+=1) + { + /* This is the busiest loop. Keep it simple .... */ + /* save the payload to store, and load up the next one */ + received_src_key=bgcol_payload_storeload2( + (void*)mioaddr + _BGP_TRx_DR, + lnkhdrd, + lnkhdrd+1, + deposited_payload, + (void*)(mioaddr + _BGP_TRx_HR)) ; +#if defined(KEEP_LNKHDR_TRAIL) + record_lnkhdr_trail(&lnkhdr) ; +#endif + slot = received_src_key & (k_ethkey_table_size-1) ; + received = ((unsigned int *)&lnkhdr)[2] ; + expected = bgcol->per_eth_table[slot].expect ; + /* Find if it was an 'expected' packet in context of previous packets from this source */ + expected_seq = ( expected >> 8 ) & 0xff ; + seq_tot_packet = expected & 0xff ; + deposited_payload = bgcol->per_eth_table[slot].payload ; + + seq_next_packet = expected_seq + 1 ; + + + next_payload = deposited_payload + COL_FRAGPAYLOAD ; + + TRACEN(k_t_detail,"slot=%08x seq(%x,%x) re(%08x,%08x)", + slot, + received_seq, expected_seq, + received, expected + ) ; + if( received != expected ) break ; + bgcol->per_eth_table[slot].payload = next_payload ; + bgcol->per_eth_table[slot].expect = expected + 0x0100 ; + if( seq_next_packet >= seq_tot_packet ) break ; + } + total_unload_count += unload_index ; + } + else + { + total_unload_count += 1 ; + } + + TRACE("slot=%08x seq(%x,%x) re(%08x,%08x)", + slot, + received_seq, expected_seq, + received, expected + ) ; + +/* We have registers loaded, and we have exited the busy loop for one of a number of reasons + * 1) This is the last packet of a frame + * 2) We have unloaded everything that the status word said was in the FIFO + * 3) This packet doesn't continue the previous frame from this source properly + * a) This is the first packet of a frame, and there was no frame in progress + * b) The sender has sent packets in a sequence that we do not understand + * + * Diagnose which, and handle appropriately + */ + end_frame_hint = 0 ; + if( received == expected && ((unsigned int)deposited_payload) != 0xffffffff ) + { + /* Things are going well, put the payload down into memory, and work out what to do with it */ + TRACE("Putting payload down at %p", deposited_payload); + + bgcol_payload_store(deposited_payload) ; + if( seq_next_packet >= seq_tot_packet) + { + /* Frame is complete. Deliver it up a layer */ + struct sk_buff *skb = bgcol->skb_rcv_table[slot] ; + if( seq_next_packet > seq_tot_packet) + { + TRACEN(k_t_request,"(!!!) seq_next_packet=%d seq_tot_packet=%d", + seq_next_packet, seq_tot_packet) ; + } +/* BUG_ON(seq_next_packet > seq_tot_packet) ; // we think we checked this as we went along; firewall report here */ + TRACEN(k_t_general,"Frame is complete"); + #if defined(COLLECTIVE_DELIVER_VIA_TASKLET) + skb_queue_tail(&bgcol->fragskb_list_rcv, skb) ; + TRACEN(k_t_general,"scheduling proto tasklet"); + tasklet_schedule(&bgcol_receive_proto_tasklet); + #else + bgcol_deliver_directly(bgcol,&lnkhdr, skb) ; + #endif + /* and tag the slot as vacant */ + bgcol_vacate_slot(bgcol,slot) ; + /* 'break' here should cause the interrupt handler to return */ + /* this CPU can then deliver the frame and the next CPU can take up */ + /* draining the bgcol */ +#if defined(COLLECTIVE_BREAK_ON_FRAME) + break ; +#endif + end_frame_hint = 1 ; + } + } + else + { + if( received == expected ) + { + /* Packet looked good, but destination address was 0xffffffff. Diagnose it ... */ + TRACEN(k_t_protocol,"Unexpected dest address 0xffffffff, received=0x%08x", received) ; + TRACEN(k_t_protocol,"slot=%d hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", slot, lnkhdr.conn_id, lnkhdr.this_pkt, lnkhdr.total_pkt, lnkhdr.dst_key, lnkhdr.src_key); + } + /* The packet wasn't in sequence with previous packets from the source. Look to see if we can handle it */ + if( 0 == lnkhdr.this_pkt ) + { + if ( lnkhdr.total_pkt * COL_FRAGPAYLOAD + COL_SKB_ALIGN <= bgcol->mtu) + { + if( 1 == lnkhdr.total_pkt ) + { + struct sk_buff *skb = bgcol->skb_mini ; + /* We have a single-packet frame. Use 'skb_mini' and send it on */ + if( skb ) + { + skb_reserve(skb, COL_SKB_ALIGN - ((unsigned int)(skb->data)) % COL_SKB_ALIGN); + payloadptr = skb_put(skb, COL_FRAGPAYLOAD); + TRACE("Putting payload in mini slot at %p", payloadptr); + bgcol_payload_store(payloadptr) ; + #if defined(COLLECTIVE_DELIVER_VIA_TASKLET) + kept_lnkhdrp = (struct bglink_hdr_col *)(&(skb->cb)) ; + *kept_lnkhdrp = lnkhdr ; + skb_queue_tail(&bgcol->fragskb_list_rcv, skb) ; + TRACE("scheduling proto tasklet"); + tasklet_schedule(&bgcol_receive_proto_tasklet); + #else + bgcol_deliver_directly(bgcol,&lnkhdr, skb) ; + #endif + } +/* bgcol->skb_mini = alloc_skb(COL_FRAGPAYLOAD + COL_SKB_ALIGN , GFP_KERNEL | GFP_ATOMIC ) ; */ + bgcol->skb_mini = take_skb_from_list_for_filling(bgcol) ; + end_frame_hint = 1 ; + /* If there was a partial frame in the underneath skbuff, it can be left for */ + /* completion later. This doesn't seem likely; but the receive logic will work for it. */ + } + else + { + /* Put the payload down at the beginning of the skb we had up our sleeve */ + struct sk_buff *skb = bgcol->skb_in_waiting ; + if( skb && (skb_tailroom(skb) >= lnkhdr.total_pkt * COL_FRAGPAYLOAD + COL_SKB_ALIGN ) ) + { + struct bglink_hdr_col *kept_lnkhdrp ; + int size = lnkhdr.total_pkt * COL_FRAGPAYLOAD ; + skb_reserve(skb, COL_SKB_ALIGN - ((unsigned int)(skb->data)) % COL_SKB_ALIGN); + payloadptr = skb_put(skb, size); + kept_lnkhdrp = (struct bglink_hdr_col *)(&(skb->cb)) ; + *kept_lnkhdrp = lnkhdr ; + TRACE("Putting payload in waiting slot at %p", payloadptr); + bgcol_payload_store(payloadptr) ; + } + else + { + if( skb ) dev_kfree_skb(skb) ; /* Maybe someone upped the MTU on us */ + skb = NULL ; + } +/* bgcol->skb_in_waiting = alloc_skb( */ +/* k_use_plentiful_skb ? k_plentiful_skb_size : bgcol->mtu */ +// , GFP_KERNEL | GFP_ATOMIC); /* And grab a new one */ + bgcol->skb_in_waiting = take_skb_from_list_for_filling(bgcol) ; + if( skb ) + { + /* If there's a part-arrived frame, trample it */ + if( bgcol->skb_rcv_table[slot] ) + { + TRACEN(k_t_protocol,"Dropping previous partial frame"); + TRACEN(k_t_protocol,"slot=%d hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", slot, lnkhdr.conn_id, lnkhdr.this_pkt, lnkhdr.total_pkt, lnkhdr.dst_key, lnkhdr.src_key); + TRACEN(k_t_protocol,"expected slot=%d re=(%08x,%08x)", slot, received, expected); + show_lnkhdr_trail("partial frame") ; + { + struct bgnet_dev *bgnet = bgcol->bgnet ; + bgnet->stats.rx_errors += 1; + bgnet->stats.rx_missed_errors += 1; + } + + dev_kfree_skb(bgcol->skb_rcv_table[slot]) ; + } + + + + /* Set things up for the fast loop */ + bgcol->skb_rcv_table[slot]=skb ; + bgcol->per_eth_table[slot].payload = payloadptr+COL_FRAGPAYLOAD ; + bgcol->per_eth_table[slot].expect = (lnkhdr.conn_id << 16) | (1 << 8) | (lnkhdr.total_pkt) ; + TRACE("Saved first packet of new frame, next bgcol->per_eth_table[%d]={%p,%08x}", slot, bgcol->per_eth_table[slot].payload,bgcol->per_eth_table[slot].expect); + + } + + else + { + TRACEN(k_t_protocol,"No skbuff memory available, dropping packet"); + TRACEN(k_t_protocol,"slot=%d hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", slot, lnkhdr.conn_id, lnkhdr.this_pkt, lnkhdr.total_pkt, lnkhdr.dst_key, lnkhdr.src_key); + bgcol->recv_no_skbuff += 1 ; + bgcol->bgnet->stats.rx_dropped += 1; + bgcol->bgnet->stats.rx_errors += 1; + } + } + } + else + { + bgcol_payload_store(scratch_payload) ; + TRACEN(k_t_protocol,"Frame larger than MTU, dropping"); + show_lnkhdr_trail("Frame larger than MTU") ; + show_payload(scratch_payload,mioaddr) ; + bgcol->bgnet->stats.rx_errors += 1; + bgcol->bgnet->stats.rx_over_errors += 1; + } + } + + else + { + /* Unexpected mid-frame packet */ + bgcol_payload_store(scratch_payload) ; + TRACEN(k_t_protocol,"Unexpected packet from middle of frame, dropping"); + show_lnkhdr_trail("Unexpected packet from middle of frame") ; + show_payload(scratch_payload,mioaddr) ; + bgcol->bgnet->stats.rx_errors += 1; + bgcol->bgnet->stats.rx_fifo_errors += 1; + } + } + + + /* We have handled the reason why the 'fast loop' dropped out. Refresh the status */ +#if !defined(COLLECTIVE_ONEPASS_TXRX) + /* and redrive the 'fast loop' if there is anything in the fifo. */ +/* status.raw = in_be32_nosync((unsigned*)(mioaddr + _BGP_TRx_Sx)); */ +/* unload_count = status.x.rcv_hdr ; */ + unload_count = bgcol_status_rcv_hdr(*(unsigned*)(mioaddr + _BGP_TRx_Sx)) ; +/* bgcol->recv_fifo_histogram3[unload_count & 0x0f ] += 1; */ +#endif + } +/* bgcol->recv_total += total_unload_count ; */ +/* Return the number of packets we unloaded, and set the high bit if we have */ +/* reason to think there's nothing coming in any time soon */ + return total_unload_count + | ( ( end_frame_hint && (unload_count == total_unload_count ) ) + ? 0x80000000 : 0 + ) ; + +} + + + +/* Attempting to free skbuffs in an interrupt handler doesn't work well, some 'destructor' callbacks */ +/* protest if they are driven at interrupt level. So we queue them to be freed later. */ +#ifndef COLLECTIVE_TRANSMIT_WITH_SLIH +static void bgcol_completed_buffer_handler(unsigned long dummy) + { + struct bg_col* bgcol=__bgcol ; + TRACE("(>)[%s:%d]",__func__, __LINE__) ; + /* Free any skbufs the transmit interrupt handler has finished with */ + { + struct sk_buff *freeskb = skb_dequeue(&(bgcol->skb_list_free) ) ; + while (freeskb) + { + TRACEN(k_t_irqflow,"Freeing skb=%p", freeskb) ; + dump_skb_partial(freeskb,64) ; + dev_kfree_skb(freeskb) ; + freeskb = skb_dequeue(&(bgcol->skb_list_free) ) ; + } + } + TRACE("(<)[%s:%d]",__func__, __LINE__) ; + } +static DECLARE_TASKLET(bgcol_completed_buffer_tasklet,bgcol_completed_buffer_handler,0) ; +#endif + +/* static char local_payload[COL_FRAGPAYLOAD] __attribute__((aligned(16))) ; */ +static void bgcol_xmit_next_skb(struct bg_col* bgcol) + { + if(! skb_queue_empty(&(bgcol->skb_list_xmit))) + { + struct sk_buff *skb = skb_dequeue(&(bgcol->skb_list_xmit) ) ; + struct bgnet_dev *bgnet = bgcol->bgnet ; + unsigned int i_am_compute_node = (bgnet->bgcol_vector ^ bgnet->eth_bridge_vector) & 0x00ffffff ; + TRACE("bgcol_xmit_next_skb bgcol_vector=0x%08x eth_bridge_vector=0x%08x i_am_compute_node=%08x", + bgnet->bgcol_vector,bgnet->eth_bridge_vector,i_am_compute_node + ) ; + bgcol->skb_current_xmit=skb ; + if( skb ) + { + unsigned long offset; + union bgcol_header dest ; + struct ethhdr *eth = (struct ethhdr *)skb->data; + /* Work out what bgcol header to use for the new skb */ + + TRACEN(k_t_irqflow,"%s: skb=%p, eth=%p, bgnet=%p, len=%d", __FUNCTION__, skb, eth, bgnet, skb->len); + dump_skb_partial(skb, 64) ; + dest.raw = 0 ; + dest.p2p.pclass = bgnet->bgcol_route; + + if (is_broadcast_ether_addr(eth->h_dest)) { + /* May have to go to the IO node for broadcasting */ + if(0 == i_am_compute_node) + { + TRACE("broadcasting from IO node") ; + dest.bcast.tag = 0; + bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol; + } + else + { + TRACE("sending to IO node for broadcast") ; + dest.p2p.vector = bgnet->eth_bridge_vector; + dest.p2p.p2p = 1; + bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_reflector_protocol; + } + } else { + TRACE("bgcol_xmit_next_skb bgnet->bgcol_vector=%08x bgnet->eth_bridge_vector=%08x",bgnet->bgcol_vector,bgnet->eth_bridge_vector) ; + if (bgnet->eth_mask == 0 || + ((bgnet->eth_mask & *(unsigned int *)(ð->h_dest[0])) == + (bgnet->eth_local))) { + if(0 == i_am_compute_node) + { + TRACE("sending to compute node") ; + dest.p2p.vector = *(unsigned int *)(ð->h_dest[2]); + bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol; + } + else + { + dest.p2p.vector = bgnet->eth_bridge_vector; + if(( bgnet->eth_bridge_vector ^ (*(unsigned int *)(ð->h_dest[2]))) & 0x00ffffff) + { + TRACE("sending to IO node for reflection") ; + bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_reflector_protocol; + } + else + { + TRACE("sending to IO node as final destination") ; + bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol; + } + } + } else { + TRACE("sending to IO node for onward transmission") ; + dest.p2p.vector = bgnet->eth_bridge_vector; + bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol; + } + dest.p2p.p2p = 1; + } + + /* initialize link layer */ + bgcol->lnkhdr_xmit.dst_key = eth_to_key(eth->h_dest); + bgcol->lnkhdr_xmit.src_key = bgnet->bgcol_vector; + + /* pad out head of packet so it starts at a 16 Byte boundary */ + offset = ((unsigned long)skb->data) & 0xf; + bgcol->lnkhdr_xmit.opt.opt_net.pad_head = offset; + bgcol->lnkhdr_xmit.opt.opt_net.pad_tail = (COL_FRAGPAYLOAD - ((skb->len + offset) % COL_FRAGPAYLOAD)) % COL_FRAGPAYLOAD; + bgcol->current_xmit_data=skb->data - offset ; + bgcol->current_xmit_len=skb->len + offset ; + /* prepare link header */ + bgcol->lnkhdr_xmit.conn_id = bgcol->curr_conn++; + bgcol->lnkhdr_xmit.total_pkt = ((skb->len + offset - 1) / COL_FRAGPAYLOAD) + 1; + bgcol->lnkhdr_xmit.this_pkt = 0; + TRACE("%s: dst_key=%08x src_key=%08x lnk_proto=%d conn_id=%d total_pkt=%d pad_head=%d pad_tail=%d", __FUNCTION__, + bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, bgcol->lnkhdr_xmit.lnk_proto, bgcol->lnkhdr_xmit.conn_id, bgcol->lnkhdr_xmit.total_pkt, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail ); + bgcol->fragidx_xmit = 0 ; + bgcol->dest_xmit = dest ; + TRACEN(k_t_lowvol,"bgnet xmit: dst=%08x, src=%08x, ldst=%08x, head=%d, tail=%d", + bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, dest.raw, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail); + } + } + } + +/* Push packets in until we finish the skb or the fifo fills */ +/* Returns 2 if we would like to push something into the fifo but cannot because it is full */ +/* Returns 1 if we pushed something into the fifo */ +static inline int bgcol_xmit_push_packets(struct bg_col* bgcol, +/* struct bgcol_channel *chn, */ + unsigned int status_in, unsigned int mioaddr) + { + unsigned int fragidx ; + struct bgnet_dev *bgnet = bgcol->bgnet ; + union bgcol_status status; + union bgcol_header dest ; + struct sk_buff *skb = bgcol->skb_current_xmit ; + void *payloadptr = bgcol->current_xmit_data ; + int len = bgcol->current_xmit_len ; + int fullness ; + int initial_fragidx ; + double *lnkhdrxd = (double *) &(bgcol->lnkhdr_xmit) ; + + dest = bgcol->dest_xmit ; + fragidx = bgcol->fragidx_xmit ; + TRACE("bgnet xmit: dst=%08x, src=%08x, ldst=%08x, head=%d, tail=%d, fragidx=%d", + bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, dest.raw, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail, fragidx); + dump_skb_partial(skb,64) ; + if( 0 != ( ((unsigned)(payloadptr) ) & 0x0f ) ) + { + TRACEN(k_t_request, "Misaligned payloadptr=%p", payloadptr) ; + } +/* BUG_ON(0 != ( ((unsigned)(payloadptr) ) & 0x0f ) ) ; */ + if( 0 == ( ((unsigned)payloadptr) & 0x0f ) ) + { + /* Have we got space in the FIFO ? */ + status.raw = status_in ; + fullness = status.x.inj_hdr ; +/* bgcol->send_fifo_histogram[fullness] += 1 ; // fullness statistics */ + TRACE("bgnet xmit: status=%08x",status.raw); + if (fullness >= COL_FIFO_SIZE ) + { + /* No room. Upper routines will retry when appropriate */ + TRACEN(k_t_irqflow,"Send FIFO full"); + TRACEN(k_t_irqflow,"bgnet xmit: dst=%08x, src=%08x, ldst=%08x, head=%d, tail=%d, fragidx=%d", + bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, dest.raw, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail, fragidx); + return 2 ; + } + /* update fragment index */ + bgcol->lnkhdr_xmit.this_pkt = fragidx; + initial_fragidx = fragidx ; +#if defined(COLLECTIVE_ONEPASS_TXRX) + if( len >= COL_FRAGPAYLOAD ) +#else + while( len >= COL_FRAGPAYLOAD && fullness < COL_FIFO_SIZE) +#endif + { + bgcol_payload_inject_load2(lnkhdrxd,lnkhdrxd+1, payloadptr) ; + dump_bgcol_packet(&bgcol->lnkhdr_xmit, payloadptr) ; + fragidx += 1 ; + bgcol->lnkhdr_xmit.this_pkt = fragidx; + *(volatile unsigned*)(mioaddr + _BGP_TRx_HI) = dest.raw; + len -= COL_FRAGPAYLOAD; + payloadptr += COL_FRAGPAYLOAD; + fullness += 1; + while( len >= COL_FRAGPAYLOAD && fullness < COL_FIFO_SIZE) + { + /* We have full packets, and space in the fifo for them */ + TRACE("bgcol: ptr=%p, len=%d", payloadptr, len); + bgcol_payload_inject_storeload2((void*)(mioaddr + _BGP_TRx_DI),lnkhdrxd,lnkhdrxd+1, payloadptr) ; + dump_bgcol_packet(&bgcol->lnkhdr_xmit, payloadptr) ; + fragidx += 1 ; + bgcol->lnkhdr_xmit.this_pkt = fragidx; + /* write destination header */ + *(volatile unsigned*)(mioaddr + _BGP_TRx_HI) = dest.raw ; + len -= COL_FRAGPAYLOAD; + payloadptr += COL_FRAGPAYLOAD; + fullness += 1; + } + bgcol_payload_inject_store((void*)(mioaddr + _BGP_TRx_DI)) ; +#if !defined(COLLECTIVE_ONEPASS_TXRX) + status.raw = in_be32_nosync((unsigned*)(mioaddr + _BGP_TRx_Sx)) ; + fullness = status.x.inj_hdr ; +#endif + } + bgnet->stats.tx_bytes += COL_FRAGPAYLOAD*(fragidx-initial_fragidx) ; + + /* Either the FIFO is full, or we are near (or at) the end of the skb-worth of data */ + /* Stuff one packet in. */ + + + if( len > 0 && fullness < COL_FIFO_SIZE ) + { + /* If the last packet doesn't cross a page boundary, we can send it with */ + /* whatever is in memory after it, and we won't get a SEGV. */ + TRACE("bgcol: ptr=%p, len=%d", payloadptr, len); + bgnet->stats.tx_bytes += len; + + /* write destination header */ +/* enable_kernel_fp() ; */ + *(volatile unsigned*)(mioaddr + _BGP_TRx_HI) = dest.raw; +/* bgcol_payload_inject_load2partial(lnkhdrxd,lnkhdrxd+1, payloadptr,(len+15)/16) ; */ + bgcol_payload_inject_load2(lnkhdrxd,lnkhdrxd+1, payloadptr) ; + bgcol_payload_inject_store((void*)(mioaddr + _BGP_TRx_DI)) ; + + len=0 ; + } + + } + else + { + /* The packet was misaligned. This will cause the skb to be flushed and we will get a */ + /* fresh one next time. */ + len=0 ; + } + TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit); + + TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit); + /* Did we complete the skb ? */ + if( 0 == len ) + { + /* Yes, we can free this one and upper layers will cue the next one */ + TRACEN(k_t_irqflow,"bgcol: finished skb=%p", skb); + bgnet->stats.tx_packets++; + dump_skb_partial(skb,64); +/* Linux seems unhappy freeing skb's in an interrupt handler */ +#if defined(COLLECTIVE_TRANSMIT_WITH_SLIH) +#if defined(COLLECTIVE_XMITTER_FREES) + skb_queue_tail(&bgcol->skb_list_free,skb) ; +#else + dev_kfree_skb(skb) ; +#endif +#else + skb_queue_tail(&bgcol->skb_list_free,skb) ; + tasklet_schedule(&bgcol_completed_buffer_tasklet) ; +#endif + bgcol->skb_current_xmit=NULL ; + } + else + { + /* No, Remember the link header for next time */ + TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit); + TRACE("bgcol: more to go for skb=%p , fragidx=%d, len=%d", skb, fragidx, skb->len); + bgcol->fragidx_xmit = fragidx ; + bgcol->current_xmit_len=len ; + bgcol->current_xmit_data=payloadptr ; + } + TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit); + return 1 ; /* Indicate that a redrive might be productive */ + } + + +/* One pass at filling the transmit FIFO. */ +/* Returns 2 if we would like to push something into the fifo but cannot because it is full */ +/* Returns 1 if we pushed something into the fifo (and we would like a redrive because we finished a frame) */ +/* Returns 0 if all the data has been put in the FIFO (and a redrive would be unproductive unless someone queues a frame for sending) */ +/* An upper layer must redrive or enable interrupts if it gets a non-zero. */ +static inline int bgcol_xmit_onepass(struct bg_col *bgcol, unsigned int status_in, unsigned int mioaddr) + { +/* unsigned chnidx = bgcol->bgnet_channel ; */ + struct sk_buff *skb = bgcol->skb_current_xmit ; + if( NULL == skb) + { + struct bgnet_dev *bgnet = bgcol->bgnet ; + if( bgnet) + { + bgcol_xmit_next_skb(bgcol) ; + skb = bgcol->skb_current_xmit ; + if( NULL == skb ) + { + TRACEN(k_t_irqflow,"bgcol: no more to send"); + return 0 ; + } + } + else + { + TRACEN(k_t_irqflow,"bgcol: bgnet is not ready"); + return 0 ; + } + } + /* By this stage we should have a viable skb and a viable link header */ + return bgcol_xmit_push_packets(bgcol, + status_in, + mioaddr) ; + } + +/* 'full duplex' SLIH, receiving and sending */ +/* Number of times to spin before concluding there isn't anything on the bgcol */ +enum { + k_unproductive_receive_threshold = 10 , + k_unproductive_transmit_threshold = 10 +}; + +void bgcol_duplex_slih(unsigned long dummy) + { + struct bg_col *bgcol = __bgcol ; + struct bgcol_channel *chn = &bgcol->chn[bgcol->bgnet_channel]; + unsigned int mioaddr=chn->mioaddr ; + unsigned int status=*((volatile unsigned*)(mioaddr + _BGP_TRx_Sx)) ; + unsigned int rcr ; + unsigned int rcx ; + unsigned int productive=0 ; + unsigned int unproductive_receive_count=0 ; + unsigned int unproductive_transmit_count=0 ; + unsigned int rcrset = 0 ; + + enable_kernel_fp() ; + +#if defined(KEEP_BG_COL_STATISTICS) + bgcol->send_fifo_histogram0[(status >> 16) & 0x0f] += 1 ; + bgcol->recv_fifo_histogram0[(status ) & 0x0f] += 1 ; +#endif + for(;;) + { + TRACEN(k_t_irqflow,"status=%08x", status); + rcr = bgcol_receive_mark3(bgcol, bgcol->bgnet_channel, status, mioaddr) ; +#if defined(KEEP_BG_COL_STATISTICS) && defined(EXTRA_TUNING) + { + unsigned int extra_status=*((volatile unsigned*)(mioaddr + _BGP_TRx_Sx)) ; + bgcol->send_fifo_histogram2[(extra_status >> 16) & 0x0f] += 1 ; + bgcol->recv_fifo_histogram2[(extra_status ) & 0x0f] += 1 ; + + } +#endif + rcx = bgcol_xmit_onepass(bgcol, status, mioaddr) ; + TRACEN(k_t_irqflow,"rcr=0x%08x rcx=0x%08x", rcr, rcx); + status=*((volatile unsigned*)(mioaddr + _BGP_TRx_Sx)) ; +#if defined(KEEP_BG_COL_STATISTICS) + bgcol->send_fifo_histogram1[(status >> 16) & 0x0f] += 1 ; + bgcol->recv_fifo_histogram1[(status ) & 0x0f] += 1 ; +#endif + /* What we do now depends on whether the slihs were 'productive' ... */ + unproductive_receive_count = rcr ? 0 : (unproductive_receive_count+1) ; + unproductive_transmit_count = (rcx==1) ? 0 : (unproductive_transmit_count+1) ; + productive += ( 0 != rcr || 1 == rcx ) ; + rcrset = ( rcr > 0 ) ? 0 : rcrset ; + rcrset |= rcr ; + if( 0 == productive ) + { +#if defined(KEEP_BG_COL_STATISTICS) + bgcol->spurious_interrupts += 1 ; +#endif + break ; /* a spurious interrupt */ + } + if( ( unproductive_receive_count > k_unproductive_receive_threshold + || (rcrset & 0x80000000) + ) + && + ( unproductive_transmit_count > k_unproductive_transmit_threshold + || (rcx == 0 ) + ) + ) break ; /* Neither transmit not receive are likely to progress */ + } + +#if defined(CONFIG_BGP_COLLECTIVE_NAPI) + if( bgcol_diagnostic_use_napi ) + { + TRACEN(k_t_napi,"napi_complete(%p)",&(bgcol->bgnet->napi)) ; + napi_complete(&(bgcol->bgnet->napi)) ; + } +#endif + bgcol->handler_running = 0 ; + if( 0 != rcx ) + { + /* Filled the TX FIFO, need an interrupt when it has room */ + TRACEN(k_t_irqflow,"Enabling TX interrupts"); + bgcol_enable_interrupts_xmit(bgcol) ; /* Ask for an interrupt when there is space */ + } + +#if defined(HAS_MISSED_INTERRUPT_TIMER) + mod_timer(&bgcol->missed_interrupt_timer, jiffies+200) ; /* Cause timer interrupt after 2000ms if things don't stay alive ... temp while diagnosing problem ... */ +#endif + bgcol_enable_interrupts_rcv(bgcol) ; + } + + +static DECLARE_TASKLET(bgcol_duplex_slih_tasklet,bgcol_duplex_slih,0); + +static irqreturn_t bgcol_duplex_interrupt(int irq, void *dev) + { + struct bg_col *bgcol = (struct bg_col*)dev; + + TRACE("bgnet: (>)interrupt %d", irq); + bgcol->handler_running = 1 ; + bgcol_disable_interrupts_xmit(bgcol) ; + bgcol_disable_interrupts_rcv(bgcol) ; + (void) mfdcrx(bgcol->dcrbase +_BGP_DCR_TR_REC_PRXF); +#if defined(CONFIG_BGP_COLLECTIVE_NAPI) + if( bgcol_diagnostic_use_napi) + { + TRACEN(k_t_napi,"napi_schedule(%p)",&bgcol->bgnet->napi) ; + napi_schedule(&bgcol->bgnet->napi) ; + } + else + { + tasklet_schedule(&bgcol_duplex_slih_tasklet); + + } +#else + tasklet_schedule(&bgcol_duplex_slih_tasklet); +#endif + TRACE("bgnet: (<)interrupt %d", irq); + return IRQ_HANDLED ; + } + + +#if defined(HAS_MISSED_INTERRUPT_TIMER) +static void bgcol_missed_interrupt(unsigned long dummy) +{ + struct bg_col *bgcol = (struct bg_col*)&static_col; + TRACEN(k_t_irqflow,"(>)") ; + + bgcol->handler_running = 1 ; + bgcol_disable_interrupts_xmit(bgcol) ; + bgcol_disable_interrupts_rcv(bgcol) ; + (void) mfdcrx(bgcol->dcrbase +_BGP_DCR_TR_REC_PRXF); + #if defined(CONFIG_BGP_COLLECTIVE_NAPI) + if( bgcol_diagnostic_use_napi) + { + TRACEN(k_t_napi,"napi_schedule(%p)",&bgcol->bgnet->napi) ; + napi_schedule(&bgcol->bgnet->napi) ; + } + else + { + tasklet_schedule(&bgcol_duplex_slih_tasklet); + + } + #else + tasklet_schedule(&bgcol_duplex_slih_tasklet); + #endif + mod_timer(&bgcol->missed_interrupt_timer, jiffies+10) ; /* Cause timer interrupt after 100ms if things don't stay alive ... temp while diagnosing problem ... */ + TRACEN(k_t_irqflow,"(<)") ; +} +#endif +int col_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct bg_col *bgcol=__bgcol ; + TRACEN(k_t_irqflow|k_t_startxmit,"%s: Enq skb=%p, dev=%p, len=%d", __FUNCTION__, skb, dev, skb->len); +#if defined(COLLECTIVE_TRANSMIT_WITH_SLIH) + skb_queue_tail(&(bgcol->skb_list_xmit),skb) ; +#if defined(COLLECTIVE_TRANSMIT_WITH_FLIH) + if( ! bgcol->handler_running) + { + TRACEN(k_t_irqflow,"Enabling TX interrupts"); + bgcol_enable_interrupts_xmit(bgcol) ; /* Ask for an interrupt when there is space */ + } +#else + tasklet_schedule(&bgcol_duplex_slih_tasklet); +#endif +#else + { + unsigned int flags ; + + dump_skb_partial(skb,64) ; + spin_lock_irqsave(&bgcol->irq_lock_xmit, flags) ; + { + struct sk_buff *xskb = bgcol->skb_current_xmit ; + if( NULL == xskb && skb_queue_empty(&(bgcol->skb_list_xmit))) + { + int rc ; + TRACEN(k_t_irqflow,"%s: Enq+en skb=%p, len=%d", __FUNCTION__, skb, skb->len); + skb_queue_tail(&(bgcol->skb_list_xmit),skb) ; + enable_kernel_fp(); + rc = bgcol_xmit_handle(bgcol) ; + if( 0 == rc ) + { + /* No room in fifo */ + TRACEN(k_t_irqflow,"Enabling TX interrupts"); + bgcol_enable_interrupts_xmit(bgcol) ; /* Ask for an interrupt when there is space */ + } + } + else + { + TRACEN(k_t_irqflow,"%s: Enq skb=%p, dev=%p, len=%d", __FUNCTION__, skb, dev, skb->len); + skb_queue_tail(&(bgcol->skb_list_xmit),skb) ; + } + } + spin_unlock_irqrestore(&bgcol->irq_lock_xmit, flags); + } +#endif +/* } */ +#if defined(COLLECTIVE_XMITTER_FREES) + { + struct sk_buff *skb = skb_dequeue(&(bgcol->skb_list_free) ) ; + while(skb) + { + TRACEN(k_t_irqflow,"Freeing sent skb=%p",skb); + dev_kfree_skb(skb) ; + skb = skb_dequeue(&(bgcol->skb_list_free) ) ; + } + + } +#endif + return 0 ; +} + + +/* static int bgpnet_add_device(int major, */ +/* int minor, */ +/* const char* devname, */ +/* unsigned long long physaddr, */ +/* int irq, */ +/* irqreturn_t (*irq_handler)(int, void *)) */ +/* { */ +/* int ret; */ +/* dev_t devno; */ +/* struct bgpnet_dev* dev = &bgpnet_devices[bgpnet_num_devices]; */ +/* */ +// /* initilize struct */ +/* init_MUTEX (&dev->sem); */ +/* dev->major = major; */ +/* dev->minor = minor; */ +/* dev->physaddr = physaddr; */ +/* init_waitqueue_head(&dev->read_wq); */ +/* dev->read_complete = 0; */ +/* if (physaddr) { */ +/* dev->regs = ioremap(physaddr, 4096); */ +/* } */ +/* devno=MKDEV(major,minor); */ +/* */ +// /* register i.e., /proc/devices */ +/* ret=register_chrdev_region(devno,1,(char *)devname); */ +/* */ +/* if (ret) */ +/* { */ +/* printk (KERN_WARNING "bgpnet: couldn't register device (%d,%d) register_chrdev_region err=%d\n", */ +/* major,minor,ret); */ +/* return ret; */ +/* } */ +/* */ +// /* add cdev */ +/* cdev_init(&dev->cdev,&bgpnet_device_fops); */ +/* dev->cdev.owner=THIS_MODULE; */ +/* dev->cdev.ops=&bgpnet_device_fops; */ +/* ret=cdev_add(&dev->cdev,devno,1); */ +/* if (ret) */ +/* { */ +/* printk(KERN_WARNING "bgpnet: couldn't register device (%d,%d) cdev_add err=%d\n", */ +/* major,minor,ret); */ +/* return ret; */ +/* } */ +/* */ +// /* signul to pass to owning process, should be altered using ioctl */ +/* dev->signum=-1; */ +/* */ +/* bgpnet_num_devices++; */ +/* */ +/* return 0; */ +/* } */ + +/* static int bgpnet_device_open (struct inode *inode, struct file *filp) */ +/* { */ +/* struct bgpnet_dev *dev=container_of(inode->i_cdev,struct bgpnet_dev,cdev); */ +/* */ +/* if(down_interruptible(&dev->sem)) return -ERESTARTSYS; */ +/* up(&dev->sem); */ +/* */ +/* dev->current=current; */ +/* filp->private_data = (void*) dev; */ +/* */ +/* TRACE("bgpnet: device (%d,%d) opened by process \"%s\" pid %i", */ +/* MAJOR(inode->i_rdev), MINOR(inode->i_rdev), current->comm, current->pid); */ +/* */ +/* return 0; */ +/* } */ + + +/* + * Read doesn't actually read anything. It simply blocks if the fifo is empty. + */ +/* static ssize_t bgpnet_device_read(struct file *filp, char __user *buf, size_t count, */ +/* loff_t *f_pos) */ +/* { */ +/* struct bgpnet_dev* dev = (struct bgpnet_dev *)filp->private_data; */ +/* union bgcol_status status; */ +/* int chn = dev->minor; */ +/* */ +/* if (dev->major == BGP_COL_MAJOR_NUM && (chn == 0 || chn == 1)) { */ +/* status.raw = in_be32((unsigned *)((char*)dev->regs + _BGP_TRx_Sx)); */ +/* if (!status.x.rcv_hdr) { */ +/* TRACE("bgpnet: read found status not ready status=0x%08x", status.raw); */ +// /* enable interrupt when packets come in. */ +/* bgcol_enable_rcv_wm_interrupt(&__bgcol->chn[chn]); */ +/* wait_event_interruptible(dev->read_wq, dev->read_complete); */ +/* dev->read_complete = 0; */ +/* TRACE("bgpnet: read wakes up"); */ +/* } */ +// /* Ok if we give a false positive -- we tried. +/* * Note that we never actually copy out some data. The status might be a useful */ +/* * thing to write in the buffer, but the caller only cares to block until */ +/* * something is there. */ +// */ +/* } */ +/* */ +/* return 0; */ +/* } */ + + +/* Don't think this will work on the 'bgnet' channel. What is the intent ? CIOD ? */ +/* If for CIOD, it may have suffered in the 'revised interrupt handler' integrataion */ +/* + * Note that poll only waits for data to be available in the read fifo. + * We do this by enabling an interrupt while we wait. The interrupt is disabled + * when it fires. The poll may complete before it fires (timeout), but that is ok. + */ +/* static unsigned int bgpnet_device_poll(struct file *filp, poll_table * wait) */ +/* { */ +/* struct bgpnet_dev* dev = (struct bgpnet_dev*) filp->private_data; */ +/* unsigned int rc; */ +/* union bgcol_status status; */ +/* unsigned int chn = dev->minor; */ +/* */ +/* if (dev->major == BGP_COL_MAJOR_NUM && (chn == 0 || chn == 1)) { */ +/* poll_wait(filp, &dev->read_wq, wait); */ +/* */ +// /* Return current col status. */ +// rc = POLLOUT|POLLWRNORM; /* For now implement read poll only */ +/* status.raw = in_be32((unsigned *)((char*)dev->regs + _BGP_TRx_Sx)); */ +/* if (status.x.rcv_hdr) { */ +/* TRACE("bgpnet: poll found status ready status=0x%08x", status.raw); */ +// /* got something already */ +/* rc |= POLLIN|POLLRDNORM; */ +/* } else { */ +/* TRACE("bgpnet: poll found status not ready status=0x%08x", status.raw); */ +// /* enable interrupt when packets come in. */ +/* mtdcrx(_BGP_DCR_TR_REC_PRXEN, (chn ? _TR_REC_PRX_WM1 : _TR_REC_PRX_WM0)); */ +/* } */ +/* } else */ +/* rc = POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM; */ +/* */ +/* return rc; */ +/* } */ + + +/* static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *vma) */ +/* { */ +/* unsigned long vsize = vma->vm_end - vma->vm_start; */ +/* struct bgpnet_dev * device = (struct bgpnet_dev *)filp->private_data; */ +/* int ret = -1; */ +/* */ +// /* ------------------------------------------------------- */ +// /* set up page protection. */ +// /* ------------------------------------------------------- */ +/* */ +/* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */ +/* vma->vm_flags |= VM_IO; */ +/* vma->vm_flags |= VM_RESERVED; */ +/* */ +// /* ------------------------------------------------------- */ +// /* do the mapping */ +// /* ------------------------------------------------------- */ +/* */ +/* if (device->physaddr != 0) */ +/* ret = remap_pfn_range(vma, */ +/* vma->vm_start, */ +/* device->physaddr >> PAGE_SHIFT, */ +/* vsize, */ +/* vma->vm_page_prot); */ +/* */ +/* if (ret) { */ +/* printk (KERN_WARNING "bgpnet: mapping of device (%d,%d) failed\n", */ +/* device->major, device->minor); */ +/* } else { */ +/* TRACE("bgpnet: mapped (%d,%d) to vm=%lx", */ +/* device->major, device->minor, vma->vm_start); */ +/* } */ +/* return ret? -EAGAIN :0; */ +/* } */ + +/* ************************************************************************* */ +/* BG/P network: release device */ +/* ************************************************************************* */ + +/* static int bgpnet_device_release (struct inode *inode, struct file * filp) */ +/* { */ +/* struct bgpnet_dev *dev=(struct bgpnet_dev *)filp->private_data; */ +/* */ +// /*Ensure exclusive access*/ +/* if(down_interruptible(&dev->sem)) return -ERESTARTSYS; */ +/* */ +/* dev->current = NULL; */ +/* up(&dev->sem); */ +/* */ +/* TRACE("bgpnet: device (%d,%d) successfully released", */ +/* MAJOR(inode->i_rdev), MINOR(inode->i_rdev)); */ +/* return 0; */ +/* } */ + + +/* static int bgpnet_device_ioctl (struct inode *inode, */ +/* struct file * filp, */ +/* unsigned int cmd, */ +/* unsigned long arg) */ +/* { */ +/* return 0; */ +/* } */ + + + +/* Base 10 is assumed. Hexadecimal numbers must begin with 0x or 0X (ie. 0xabadcafe). */ +/* Binary numbers must begin with the letter b in lowercase (ie. b01101001). */ +#define LOWER(c) ((c) < 'a' ? (c) + ('a' - 'A') : (c)) +static inline unsigned long atol(char *str) +{ + unsigned long value = 0; + unsigned char base = 10; + + if ((*str == '0') && (LOWER(*(str+1)) == 'x')) { + base = 16; /* hexadecimal */ + str += 2; + } else if (*str == 'b') { + base = 2; /* binary */ + str++; + } + + for (; *str; str++) { + unsigned char digit = (*str > '9') ? (10 + LOWER(*str) - 'a') : (*str - '0'); + if (digit >= base) { + value = 0; + break; + } + value = value * base + digit; + } + + return value; +} + + + + + +/********************************************************************** + * Initialization and shut-down + **********************************************************************/ + +static inline void bgcol_reset_channel(struct bgcol_channel *chn) +{ + mtdcrx(chn->dcrbase + _BGP_DCR_TR_RCTRL, _TR_RCTRL_RST); + mtdcrx(chn->dcrbase + _BGP_DCR_TR_SCTRL, _TR_RCTRL_RST); +} + + +static int bgcol_init_channel(unsigned long idx, struct bg_col *col) +{ + struct bgcol_channel* chn = &col->chn[idx]; + int i; + + chn->paddr = COL_CHANNEL_PADDR(idx); + chn->dcrbase = col->dcrbase + COL_CHANNEL_DCROFF(idx); + chn->irq_rcv_pending_mask = COL_IRQ_RCV_PENDING_MASK(idx); + chn->irq_inj_pending_mask = COL_IRQ_INJ_PENDING_MASK(idx); + init_timer(&chn->inj_timer); + chn->inj_timer.function = inj_timeout; + chn->inj_timer.data = (unsigned long) col; + chn->inj_timer.expires = 0; + for (i = 0; i < BGP_MAX_DEVICES; i++) + if (bgpnet_devices[i].major == BGP_COL_MAJOR_NUM && + bgpnet_devices[i].minor == idx) { + chn->chrdev = &bgpnet_devices[i]; + break; + } + if (i >= BGP_MAX_DEVICES) + chn->chrdev = NULL; + chn->col = col; + chn->idx = idx; + + if (!request_mem_region(chn->paddr, _BGP_COL_SIZE, COL_DEV_NAME)) + return -1; + + chn->mioaddr = (unsigned long)ioremap(chn->paddr, _BGP_COL_SIZE); + if (!chn->mioaddr) + goto err_remap; + + if (chn) + mtdcrx(col->dcrbase + _BGP_DCR_TR_GLOB_VCFG1, + _TR_GLOB_VCFG_RWM(0) | _TR_GLOB_VCFG_IWM(4)); + else + mtdcrx(col->dcrbase + _BGP_DCR_TR_GLOB_VCFG0, + _TR_GLOB_VCFG_RWM(0) | _TR_GLOB_VCFG_IWM(4)); + mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, COL_IRQMASK_REC); + mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_INJ_PIXEN, COL_IRQMASK_INJ); + + return 0; + + err_remap: + printk("error mapping col\n"); + release_mem_region(chn->mioaddr, _BGP_COL_SIZE); + + return -1; +} + +static int bgcol_uninit_channel(struct bgcol_channel *chn, + struct bg_col *col) +{ + if (chn->mioaddr) + { + iounmap((void*)chn->mioaddr); + chn->mioaddr = 0; + + /* unconditionally... */ + release_mem_region(chn->paddr, _BGP_COL_SIZE); + } + return 0; +} + +static int bgcol_init (struct bg_col *col) +{ + int cidx, rc, idx; + +/* skb_queue_head_init(&skb_delivery_queue) ; */ + if( 0 == col->mtu) + { + bgcol_set_mtu(col,60960+sizeof(struct ethhdr) ) ; /* It's possible that the 'bgnet' might have won a race to set MTU ... */ + } + col->skb_in_waiting = alloc_skb( + k_use_plentiful_skb ? k_plentiful_skb_size : col->mtu + , GFP_KERNEL ); + col->skb_mini = alloc_skb(BGNET_FRAG_MTU + COL_SKB_ALIGN , GFP_KERNEL ) ; + + spin_lock_init(&col->lock); + spin_lock_init(&col->irq_lock); + + skb_queue_head_init(&col->skb_list_for_filling) ; + skb_queue_head_init(&col->skb_list_for_delivering) ; + skb_queue_head_init(&col->skb_list_for_freeing) ; + + bgcol_prefill(&col->skb_list_for_filling, 100) ; + + + col->dcrbase = COL_DCR_BASE; + + skb_queue_head_init(&col->skb_list_xmit) ; + skb_queue_head_init(&col->skb_list_free) ; + col->skb_current_xmit = NULL ; + + skb_queue_head_init(&col->fragskb_list_rcv) ; + init_ethkey_table(col) ; + + /* abuse IO port structure for DCRs */ + if (!request_region(col->dcrbase, COL_DCR_SIZE, COL_DEV_NAME)) + return -1; + + /* disable device IRQs before we attach them */ + bgcol_disable_interrupts(col); + +#if defined(HAS_MISSED_INTERRUPT_TIMER) + setup_timer(&col->missed_interrupt_timer,bgcol_missed_interrupt,0) ; +#endif + col->nodeid = mfdcrx(col->dcrbase + _BGP_DCR_TR_GLOB_NADDR); + + for (cidx = 0; cidx < BGP_MAX_CHANNEL; cidx++) { + if (bgcol_init_channel(cidx, col) != 0) + goto err_channel; + } + + /* clear exception flags */ + mfdcrx(col->dcrbase + _BGP_DCR_TR_INJ_PIXF); + mfdcrx(col->dcrbase + _BGP_DCR_TR_REC_PRXF); + + /* allocate IRQs last; otherwise, if an IRQ is still pending, we */ + /* get kernel segfaults */ + for (idx = 0; bgcol_irqs[idx].irq != -1; idx++) + { +#if defined(COLLECTIVE_TREE_AFFINITY) + bic_set_cpu_for_irq(bgcol_irqs[idx].irq,k_TreeAffinityCPU) ; + TRACEN(k_t_general,"setting affinity irq=%d affinity=%d",bgcol_irqs[idx].irq, k_TreeAffinityCPU ); +#endif + rc = request_irq(bgcol_irqs[idx].irq, bgcol_irqs[idx].handler, + IRQF_DISABLED, bgcol_irqs[idx].name, col); + if (rc) + goto err_irq_alloc; + } + + + return 0; + + err_irq_alloc: + for (idx = 0; bgcol_irqs[idx].irq != -1; idx++) + free_irq(bgcol_irqs[idx].irq, col); + + err_channel: + for (cidx = 0; cidx < BGP_MAX_CHANNEL; cidx++) + bgcol_uninit_channel(&col->chn[cidx], col); + + release_region(col->dcrbase, COL_DCR_SIZE); + + return -1; +} + +/********************************************************************** + * /proc filesystem + **********************************************************************/ + +#define TGREAD(r, d) \ + rc = snprintf(page, remaining, "%.30s (%03x): %08x\n", d, \ + bgcol->dcrbase + r, mfdcrx(bgcol->dcrbase + r)); \ + if (rc < 0) goto out; \ + if (rc > remaining) { remaining = 0; goto out; } \ + page += rc; \ + remaining -= rc; + +#define TGSHOW(r) \ + rc = snprintf(page, remaining, "%.60s : %08x\n", #r, (unsigned int)(r) );\ + if (rc < 0) goto out; \ + if (rc > remaining) { remaining = 0; goto out; } \ + page += rc; \ + remaining -= rc; + + +static int bgpnet_statistics_read (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct bg_col *bgcol = data; + int rc, remaining = count; + *eof = 1; + TGREAD(_BGP_DCR_TR_REC_PRXEN, "Receive Exception Enable"); + TGREAD(_BGP_DCR_TR_REC_PRXF, "Receive Exception Flag "); + TGREAD(_BGP_DCR_TR_INJ_PIXEN, "Injection Exception Enable"); + TGREAD(_BGP_DCR_TR_INJ_PIXF, "Injection Exception Flag "); + + TGSHOW(*((unsigned*)(bgcol->chn[0].mioaddr + _BGP_TRx_Sx))) ; + TGSHOW(bgcol->curr_conn) ; +#if !defined(COLLECTIVE_TRANSMIT_WITH_SLIH) + TGSHOW(spin_is_locked(&bgcol->irq_lock_xmit)) ; +#endif + TGSHOW(skb_queue_len(&bgcol->skb_list_xmit)) ; + TGSHOW(skb_queue_len(&bgcol->skb_list_free)) ; + TGSHOW(skb_queue_len(&bgcol->fragskb_list_rcv)) ; + TGSHOW(bgcol->skb_current_xmit) ; + TGSHOW(bgcol->current_xmit_len) ; + TGSHOW(bgcol->fragidx_xmit) ; + TGSHOW(bgcol->recv_total) ; + TGSHOW(bgcol->recv_guess_miss) ; + TGSHOW(bgcol->recv_no_skbuff) ; + TGSHOW(bgcol->recv_no_first_packet) ; + TGSHOW(bgcol->spurious_interrupts) ; + TGSHOW(irq_desc[BG_COL_IRQ_INJ].status) ; + TGSHOW(irq_desc[BG_COL_IRQ_INJ].irq_count) ; + TGSHOW(irq_desc[BG_COL_IRQ_INJ].irqs_unhandled) ; + TGSHOW(irq_desc[BG_COL_IRQ_RCV].status) ; + TGSHOW(irq_desc[BG_COL_IRQ_RCV].irq_count) ; + TGSHOW(irq_desc[BG_COL_IRQ_RCV].irqs_unhandled) ; + + out: + + return count - remaining; +} + +static int bgpnet_status_read (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct bg_col *bgcol = data; + int rc, remaining = count; + *eof = 1; + + + TGREAD(_BGP_DCR_TR_GLOB_FPTR, "Fifo Pointer"); + TGREAD(_BGP_DCR_TR_GLOB_NADDR, "Node Address"); + TGREAD(_BGP_DCR_TR_GLOB_VCFG0, "VC0 Configuration"); + TGREAD(_BGP_DCR_TR_GLOB_VCFG1, "VC1 Configuration"); + TGREAD(_BGP_DCR_TR_REC_PRXEN, "Receive Exception Enable"); + TGREAD(_BGP_DCR_TR_REC_PRXF, "Receive Exception Flag "); + TGREAD(_BGP_DCR_TR_REC_PRDA, "Receive Diagnostic Address"); + TGREAD(_BGP_DCR_TR_REC_PRDD, "Receive Diagnostic Data"); + TGREAD(_BGP_DCR_TR_INJ_PIXEN, "Injection Exception Enable"); + TGREAD(_BGP_DCR_TR_INJ_PIXF, "Injection Exception Flag "); + TGREAD(_BGP_DCR_TR_INJ_PIDA, "Injection Diagnostic Address"); + TGREAD(_BGP_DCR_TR_INJ_PIDD, "Injection Diagnostic Data"); + TGREAD(_BGP_DCR_TR_INJ_CSPY0, "VC0 payload checksum"); + TGREAD(_BGP_DCR_TR_INJ_CSHD0, "VC0 header checksum"); + TGREAD(_BGP_DCR_TR_INJ_CSPY1, "VC1 payload checksum"); + TGREAD(_BGP_DCR_TR_INJ_CSHD1, "VC1 header checksum"); + + TGREAD(_BGP_DCR_TR_CLASS_RDR0, "Route Desc 0, 1"); + TGREAD(_BGP_DCR_TR_CLASS_RDR1, "Route Desc 2, 3"); + TGREAD(_BGP_DCR_TR_CLASS_RDR2, "Route Desc 4, 5"); + TGREAD(_BGP_DCR_TR_CLASS_RDR3, "Route Desc 6, 7"); + TGREAD(_BGP_DCR_TR_CLASS_RDR4, "Route Desc 8, 9"); + TGREAD(_BGP_DCR_TR_CLASS_RDR5, "Route Desc 10, 11"); + TGREAD(_BGP_DCR_TR_CLASS_RDR6, "Route Desc 12, 13"); + TGREAD(_BGP_DCR_TR_CLASS_RDR7, "Route Desc 14, 15"); + TGREAD(_BGP_DCR_TR_CLASS_ISRA, "Idle pattern low"); + TGREAD(_BGP_DCR_TR_CLASS_ISRB, "Idle pattern high"); + + TGREAD(_BGP_DCR_TR_DMA_DMAA, "SRAM diagnostic addr"); + TGREAD(_BGP_DCR_TR_DMA_DMAD, "SRAM diagnostic data"); + TGREAD(_BGP_DCR_TR_DMA_DMADI, "SRAM diagnostic data inc"); + TGREAD(_BGP_DCR_TR_DMA_DMAH, "SRAM diagnostic header"); + + TGREAD(_BGP_DCR_TR_ERR_R0_CRC, "CH0: Receiver link CRC errors"); + TGREAD(_BGP_DCR_TR_ERR_R0_CE, "CH0: Receiver SRAM errors corrected"); + TGREAD(_BGP_DCR_TR_ERR_S0_RETRY, "CH0: Sender link retransmissions"); + TGREAD(_BGP_DCR_TR_ERR_S0_CE, "CH0: Sender SRAM errors corrected"); + + TGREAD(_BGP_DCR_TR_ERR_R1_CRC, "CH1: Receiver link CRC errors"); + TGREAD(_BGP_DCR_TR_ERR_R1_CE, "CH1: Receiver SRAM errors corrected"); + TGREAD(_BGP_DCR_TR_ERR_S1_RETRY, "CH1: Sender link retransmissions"); + TGREAD(_BGP_DCR_TR_ERR_S1_CE, "CH1: Sender SRAM errors corrected"); + + TGREAD(_BGP_DCR_TR_ERR_R2_CRC, "CH2: Receiver link CRC errors"); + TGREAD(_BGP_DCR_TR_ERR_R2_CE, "CH2: Receiver SRAM errors corrected"); + TGREAD(_BGP_DCR_TR_ERR_S2_RETRY, "CH2: Sender link retransmissions"); + TGREAD(_BGP_DCR_TR_ERR_S2_CE, "CH2: Sender SRAM errors corrected"); + + TGREAD(_BGP_DCR_TR_ARB_RCFG, "ARB: General router config"); + TGREAD(_BGP_DCR_TR_ARB_RSTAT, "ARB: General router status"); + TGREAD(_BGP_DCR_TR_ARB_HD00, "ARB: Next hdr, CH0, VC0"); + TGREAD(_BGP_DCR_TR_ARB_HD01, "ARB: Next hdr, CH0, VC1"); + TGREAD(_BGP_DCR_TR_ARB_HD10, "ARB: Next hdr, CH1, VC0"); + TGREAD(_BGP_DCR_TR_ARB_HD11, "ARB: Next hdr, CH1, VC1"); + TGREAD(_BGP_DCR_TR_ARB_HD20, "ARB: Next hdr, CH2, VC0"); + TGREAD(_BGP_DCR_TR_ARB_HD21, "ARB: Next hdr, CH2, VC1"); + + rc = snprintf(page, remaining, "CH0: status=%08x\n", + in_be32((unsigned*)(bgcol->chn[0].mioaddr + _BGP_TRx_Sx))); + if (rc < 0) goto out; + if (rc > remaining) { remaining = 0; goto out; } + page += rc; remaining -= rc; + + rc = snprintf(page, remaining, "CH1: status=%08x\n", + in_be32((unsigned*)(bgcol->chn[1].mioaddr + _BGP_TRx_Sx))); + if (rc < 0) goto out; + if (rc > remaining) { remaining = 0; goto out; } + page += rc; remaining -= rc; + + rc = snprintf(page, remaining, "Data placement total=%d guess wrong=%d\n", + bgcol->recv_total, bgcol->recv_guess_miss) ; + if (rc < 0) goto out; + if (rc > remaining) { remaining = 0; goto out; } + page += rc; remaining -= rc; + rc = snprintf(page,remaining, "Receive no_skbuff=%d no_first_packet=%d\n", + bgcol->recv_no_skbuff, bgcol->recv_no_first_packet) ; + if (rc < 0) goto out; + if (rc > remaining) { remaining = 0; goto out; } + page += rc; remaining -= rc; + +#if defined(KEEP_BG_COL_STATISTICS) + { +/* int x ; */ +/* for( x=0; x<=COL_FIFO_SIZE;x+=1) */ +/* { */ +/* rc = snprintf(page, remaining, "sf_h0[%d]=%d\n", x, bgcol->send_fifo_histogram0[x]) ; */ +/* if (rc < 0) goto out; */ +/* if (rc > remaining) { remaining = 0; goto out; } */ +/* page += rc; remaining -= rc; */ +/* } */ +/* for( x=0; x<=COL_FIFO_SIZE;x+=1) */ +/* { */ +/* rc = snprintf(page, remaining, "sf_h1[%d]=%d\n", x, bgcol->send_fifo_histogram1[x]) ; */ +/* if (rc < 0) goto out; */ +/* if (rc > remaining) { remaining = 0; goto out; } */ +/* page += rc; remaining -= rc; */ +/* } */ +/* for( x=0; x<=COL_FIFO_SIZE;x+=1) */ +/* { */ +/* rc = snprintf(page, remaining, "rf_h0[%d]=%d\n", x, bgcol->recv_fifo_histogram0[x]) ; */ +/* if (rc < 0) goto out; */ +/* if (rc > remaining) { remaining = 0; goto out; } */ +/* page += rc; remaining -= rc; */ +/* } */ +/* for( x=0; x<=COL_FIFO_SIZE;x+=1) */ +/* { */ +/* rc = snprintf(page, remaining, "rf_h1[%d]=%d\n", x, bgcol->recv_fifo_histogram1[x]) ; */ +/* if (rc < 0) goto out; */ +/* if (rc > remaining) { remaining = 0; goto out; } */ +/* page += rc; remaining -= rc; */ +/* } */ + rc=snprintf(page, remaining, "spurious interrupts=%d\n", bgcol->spurious_interrupts) ; + if (rc < 0) goto out; + if (rc > remaining) { remaining = 0; goto out; } + page += rc; remaining -= rc; + } +#endif + + out: + + return count - remaining; +} + + +static int bgcol_proc_write(struct file *filp, const char __user *buff, unsigned long len, void *data) + { + char proc_write_buffer[256] ; + unsigned long actual_len=(len<255) ? len : 255 ; + int rc = copy_from_user( proc_write_buffer, buff, actual_len ) ; + if( rc != 0 ) return -EFAULT ; + proc_write_buffer[actual_len] = 0 ; + return actual_len ; + } + +/* static unsigned char xtable[256] = */ +/* { */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */ +/* }; */ +/* */ +/* static int bgcol_atoix(const unsigned char *cp) */ +/* { */ +/* int result = 0 ; */ +/* unsigned char ecp = xtable[*cp] ; */ +/* while (ecp < 0x10) */ +/* { */ +/* result = (result << 4 ) | ecp ; */ +/* cp += 1 ; */ +/* ecp = xtable[*cp] ; */ +/* } */ +/* return result ; */ +/* } */ + +static int dcrcopy ; +static int proc_docoldcr(struct ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) + { + int rc ; + TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ; + + dcrcopy=mfdcrx((unsigned int)(ctl->extra1)) ; + rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ; + TRACE("(<)") ; + return rc ; + } + +static int proc_docolmio_0(struct ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) + { + int rc ; + TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ; + ctl->data=(unsigned*)(static_col.chn[0].mioaddr + (unsigned int)(ctl->extra1)) ; + rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ; + TRACE("(<)") ; + return rc ; + } + +static int proc_docolmio_1(struct ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) + { + int rc ; + TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ; + ctl->data=(unsigned*)(static_col.chn[1].mioaddr + (unsigned int)(ctl->extra1)) ; + rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ; + TRACE("(<)") ; + return rc ; + } + +static struct ctl_path bgp_col_ctl_path[] = { + { .procname = "bgp", .ctl_name = 0, }, + { .procname = "collective", .ctl_name = 0, }, + { }, +}; + +#define CTL_PARAM_ADDR(Name,Addr) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .data = (int *)Addr, \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_dointvec \ + } + +#define CTL_PARAM_MIO_0(Name,Offset) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_docolmio_0, \ + .extra1 = (void *)Offset \ + } + +#define CTL_PARAM_MIO_1(Name,Offset) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_docolmio_1, \ + .extra1 = (void *)Offset \ + } + +#define CTL_PARAM_COLDCR(Name,DCRNumber) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .data = &dcrcopy , \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_docoldcr , \ + .extra1 = (void *) DCRNumber \ + } + +static unsigned int static_pagesize = 1 << PAGE_SHIFT ; + +static struct ctl_table bgp_col_ctl_table[] = { +/* CTL_PARAM_ADDR("napi",&bgcol_diagnostic_use_napi) , */ + CTL_PARAM_ADDR("pagesize",&static_pagesize) , + CTL_PARAM_ADDR("tracemask",&bgcol_debug_tracemask) , +/* CTL_PARAM_ADDR("e10000_diag_count",&e10000_diag_count) , */ + CTL_PARAM_COLDCR("Receive-Exception-Enable",_BGP_DCR_TR_REC_PRXEN), + CTL_PARAM_COLDCR("Receive-Exception-Flag",_BGP_DCR_TR_REC_PRXF), + CTL_PARAM_COLDCR("Injection-Exception-Enable",_BGP_DCR_TR_INJ_PIXEN), + CTL_PARAM_COLDCR("Injection-Exception-Flag ",_BGP_DCR_TR_INJ_PIXF), + CTL_PARAM_MIO_0("BGP_TR0_S0",_BGP_TRx_Sx) , + CTL_PARAM_MIO_1("BGP_TR1_S1",_BGP_TRx_Sx) , + CTL_PARAM_ADDR("curr_conn",&static_col.curr_conn) , + CTL_PARAM_ADDR("current_xmit_len",&static_col.current_xmit_len) , + CTL_PARAM_ADDR("fragidx_xmit",&static_col.fragidx_xmit) , + CTL_PARAM_ADDR("recv_total",&static_col.recv_total) , + CTL_PARAM_ADDR("recv_guess_miss",&static_col.recv_guess_miss) , + CTL_PARAM_ADDR("recv_no_skbuff",&static_col.recv_no_skbuff) , + CTL_PARAM_ADDR("recv_no_first_packet",&static_col.recv_no_first_packet) , + CTL_PARAM_ADDR("deliver_without_workqueue",&static_col.deliver_without_workqueue) , +#if defined(KEEP_BG_COL_STATISTICS) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sf_h0" , + .data = static_col.send_fifo_histogram0, + .maxlen = COL_FIFO_SIZE*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sf_h1" , + .data = static_col.send_fifo_histogram1, + .maxlen = COL_FIFO_SIZE*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rf_h0" , + .data = static_col.recv_fifo_histogram0, + .maxlen = COL_FIFO_SIZE*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rf_h1" , + .data = static_col.recv_fifo_histogram1, + .maxlen = COL_FIFO_SIZE*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , +#if defined(EXTRA_TUNING) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sf_h2" , + .data = static_col.send_fifo_histogram2, + .maxlen = COL_FIFO_SIZE*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rf_h2" , + .data = static_col.recv_fifo_histogram2, + .maxlen = COL_FIFO_SIZE*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + +#endif +#endif + { 0 } + + +} ; + +static void register_collective_sysctl(struct bg_col *col) +{ + col->sysctl_table_header=register_sysctl_paths(bgp_col_ctl_path,bgp_col_ctl_table) ; + TRACEN(k_t_init, "sysctl_table_header=%p",col->sysctl_table_header) ; + +} + +int __init +bgcol_module_init(void) +{ + struct bg_col *col = &static_col ; + int rc; + unsigned long long tr0, tr1, ts0, ts1; + + register_collective_sysctl(&static_col) ; + + tr0=((unsigned long long)_BGP_UA_COL0<<32) + _BGP_PA_COL0; + tr1=((unsigned long long)_BGP_UA_COL1<<32) + _BGP_PA_COL1; + ts0=((unsigned long long)_BGP_UA_TORUS0<<32) + _BGP_PA_TORUS0; + ts1=((unsigned long long)_BGP_UA_TORUS1<<32) + _BGP_PA_TORUS1; + +#if defined(KEEP_BG_COL_STATISTICS) || defined(BGP_COL_STATUS_VISIBILITY) + bgpnetDir = proc_mkdir("bgpcol", NULL); + if (bgpnetDir) { +#if defined(KEEP_BG_COL_STATISTICS) + statisticsEntry = create_proc_entry("statistics", S_IRUGO, bgpnetDir); + if (statisticsEntry) { + statisticsEntry->nlink = 1; + statisticsEntry->read_proc = (void*) bgpnet_statistics_read; + statisticsEntry->write_proc = (void*) bgcol_proc_write; + statisticsEntry->data = col ; + } +#endif +#if defined(BGP_COL_STATUS_VISIBILITY) + statusEntry = create_proc_entry("status", S_IRUGO, bgpnetDir); + if (statusEntry) { + statusEntry->nlink = 1; + statusEntry->read_proc = (void*) bgpnet_status_read; + statusEntry->write_proc = (void*) bgcol_proc_write; + statusEntry->data = col ; + } +#endif +/* #if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) */ +/* tracemaskEntry = create_proc_entry("tracemask", S_IRUGO, bgpnetDir); */ +/* if (tracemaskEntry) { */ +/* tracemaskEntry->nlink = 1; */ +/* tracemaskEntry->read_proc = (void*) bgpnet_tracemask_read; */ +/* tracemaskEntry->write_proc = (void*) bgpnet_tracemask_write; */ +/* } */ +/* #endif */ + } +#endif + + rc = bgcol_init(col); + if (rc) + goto err_col_init; + + mb(); + + + return 0; + + err_col_init: + /* XXX: unmap IRQs */ + return rc; +} diff --git a/drivers/net/bgp_collective/bgcol.h b/drivers/net/bgp_collective/bgcol.h new file mode 100644 index 00000000000000..1ea2b7638cb7ac --- /dev/null +++ b/drivers/net/bgp_collective/bgcol.h @@ -0,0 +1,285 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Authors: Chris Ward <tjcw@uk.ibm.com> + * Volkmar Uhlig <vuhlig@us.ibm.com> + * Andrew Tauferner <ataufer@us.ibm.com> + * + * Description: Header file for col device + * + * + ********************************************************************/ +#ifndef __DRIVERS__NET__BLUEGENE__COL_H__ +#define __DRIVERS__NET__BLUEGENE__COL_H__ + +#define KEEP_BG_COL_STATISTICS +#define EXTRA_TUNING +/* #define KEEP_RECV_TOTAL */ +#define HAS_MISSED_INTERRUPT_TIMER + +#define _BGP_COL_BASE (0x610000000ULL) +#define _BGP_COL_OFFSET (0x001000000ULL) +#define _BGP_COL_SIZE (0x400) + +#define _BGP_TORUS_BASE (0x601140000ULL) +#define _BGP_TORUS_OFFSET (0x000010000ULL) + +#define BGP_MAX_CHANNEL 2 +#define BGP_COL_CHANNEL 0 +#define BGP_COL_ADDR_BITS 24 + +#define COL_CHANNEL_PADDR(c) (_BGP_COL_BASE + ((c)*_BGP_COL_OFFSET)) +#define COL_CHANNEL_DCROFF(c) (0x20 + ((c) * 8)) +#define COL_DCR_BASE (0xc00) +#define COL_DCR_SIZE (0x80) + +#define COL_IRQMASK_INJ (_TR_INJ_PIX_APAR0 | _TR_INJ_PIX_APAR1 |\ + _TR_INJ_PIX_ALIGN0 | _TR_INJ_PIX_ALIGN1 |\ + _TR_INJ_PIX_ADDR0 | _TR_INJ_PIX_ADDR1 |\ + _TR_INJ_PIX_DPAR0 | _TR_INJ_PIX_DPAR1 |\ + _TR_INJ_PIX_COLL | _TR_INJ_PIX_UE |\ + _TR_INJ_PIX_PFO0 | _TR_INJ_PIX_PFO1 |\ + _TR_INJ_PIX_HFO0 | _TR_INJ_PIX_HFO1) + +#define COL_IRQMASK_REC (_TR_REC_PRX_APAR0 | _TR_REC_PRX_APAR1 |\ + _TR_REC_PRX_ALIGN0 | _TR_REC_PRX_ALIGN1 |\ + _TR_REC_PRX_ADDR0 | _TR_REC_PRX_ADDR1 |\ + _TR_REC_PRX_COLL | _TR_REC_PRX_UE |\ + _TR_REC_PRX_PFU0 | _TR_REC_PRX_PFU1 |\ + _TR_REC_PRX_HFU0 | _TR_REC_PRX_HFU1 |\ + _TR_REC_PRX_WM0 | _TR_REC_PRX_WM1 ) + +#define COL_IRQ_RCV_PENDING_MASK(idx) (1U << (1 - idx)) +#define COL_IRQ_INJ_PENDING_MASK(idx) (1U << (2 - idx)) + + +#define COL_IRQ_GROUP 5 +#define COL_IRQ_BASE 20 +#define COL_IRQ_NONCRIT_NUM 20 +#define COL_NONCRIT_BASE 0 +#define COL_FIFO_SIZE 8 + + +union bgcol_header { + unsigned int raw; + struct { + unsigned int pclass : 4; + unsigned int p2p : 1; + unsigned int irq : 1; + unsigned vector : 24; + unsigned int csum_mode : 2; + } p2p; + struct { + unsigned int pclass : 4; + unsigned int p2p : 1; + unsigned int irq : 1; + unsigned int op : 3; + unsigned int opsize : 7; + unsigned int tag : 14; + unsigned int csum_mode : 2; + } bcast; +} __attribute__((packed)); + +union bgcol_status { + unsigned int raw; + struct { + unsigned int inj_pkt : 4; + unsigned int inj_qwords : 4; + unsigned int __res0 : 4; + unsigned int inj_hdr : 4; + unsigned int rcv_pkt : 4; + unsigned int rcv_qwords : 4; + unsigned int __res1 : 3; + unsigned int irq : 1; + unsigned int rcv_hdr : 4; + } x; +} __attribute__((packed)); + +static inline unsigned int bgcol_status_inj_pkt (unsigned int status) { return status >> 28 ; } +static inline unsigned int bgcol_status_inj_qwords(unsigned int status) { return (status >> 24) & 0x0f ; } +static inline unsigned int bgcol_status_inj_hdr (unsigned int status) { return (status >> 16) & 0x0f ; } +static inline unsigned int bgcol_status_rcv_pkt (unsigned int status) { return (status >> 12) & 0x0f ; } +static inline unsigned int bgcol_status_rcv_qwords(unsigned int status) { return (status >> 8 ) & 0x0f ; } +static inline unsigned int bgcol_status_irq (unsigned int status) { return (status >> 4 ) & 1 ; } +static inline unsigned int bgcol_status_rcv_hdr (unsigned int status) { return status & 0x0f ; } + + +/* some device defined */ +#define _BGP_DCR_TR_RCTRL (_BGP_DCR_TR_CH0_RCTRL - _BGP_DCR_TR_CH0) +#define _BGP_DCR_TR_SCTRL (_BGP_DCR_TR_CH0_SCTRL - _BGP_DCR_TR_CH0) +#define _BGP_DCR_TR_RSTAT (_BGP_DCR_TR_CH0_RSTAT - _BGP_DCR_TR_CH0) + +/* hardware specification: 4 bytes address, 256 bytes payload */ +#define COL_ALEN 4 +#define COL_PAYLOAD 256 + +#define FRAGMENT_LISTS 256 + + +struct bgpnet_dev +{ + int major,minor; /* device major, minor */ + unsigned long long physaddr; /* physical address */ + struct task_struct* current; /* process holding device */ + int signum; /* signal to send holding process */ + wait_queue_head_t read_wq; + int read_complete; + void *regs; /* mapped regs (only used with col) */ + struct semaphore sem; /* interruptible semaphore */ + struct cdev cdev; /* container device? */ +}; + + +struct bgcol_channel { + phys_addr_t paddr; + unsigned long mioaddr; + unsigned int dcrbase; + unsigned long irq_rcv_pending_mask; + unsigned long irq_inj_pending_mask; + struct timer_list inj_timer; + unsigned int injected; + unsigned int partial_injections; + unsigned int unaligned_hdr_injections; + unsigned int unaligned_data_injections; + unsigned int received; + unsigned int inject_fail; + unsigned int dropped; + unsigned int delivered; + unsigned int idx; + struct bg_col* col; + struct bgpnet_dev* chrdev; +}; + +enum { + k_ethkey_table_size=256 +}; + +struct bg_col_per_eth { + unsigned char * payload ; + unsigned int expect ; +}; + +struct bg_col { + spinlock_t lock; + spinlock_t irq_lock; + struct bgcol_channel chn[BGP_MAX_CHANNEL]; + unsigned int dcrbase; + unsigned int curr_conn; + unsigned int nodeid; + unsigned int inj_wm_mask; + unsigned int bgnet_channel ; + + unsigned int max_packets_per_frame ; + unsigned int mtu ; + + /* statistics */ + unsigned fragment_timeout; + + /* Interrupt management */ + unsigned int handler_running ; + /* Transmission items */ + struct bglink_hdr_col lnkhdr_xmit __attribute__((aligned(8))); /* Link header being used for partially-sent skb */ + spinlock_t irq_lock_xmit ; + struct sk_buff_head skb_list_xmit ; /* List of skb's to be sent */ + struct sk_buff_head skb_list_free ; /* Keep a list of skb's to free at user level */ + struct sk_buff * skb_current_xmit ; /* Partially-sent skb, if any */ + void * current_xmit_data ; /* Data from current skb adjusted for alignment */ + int current_xmit_len ; /* Length of current skb data */ + union bgcol_header dest_xmit ; + unsigned int fragidx_xmit ; + + /* Reception items */ + struct bglink_hdr_col lnkhdr_rcv __attribute__((aligned(8))); /* Link header pulled out of reception FIFO */ + struct sk_buff_head fragskb_list_rcv ; /* List of fully-received frames */ + struct sk_buff_head fragskb_list_discard ; /* List of frames to discard */ + struct sk_buff * skb_in_waiting ; /* An skb ready to catch the start of a 'new' frame */ + struct sk_buff * skb_mini ; /* A 'miniature' skbuff just right for catching single-packet frames */ + + /* Core-to-core items */ + struct sk_buff_head skb_list_for_filling ; + struct sk_buff_head skb_list_for_delivering ; + struct sk_buff_head skb_list_for_freeing ; + + unsigned int deliver_without_workqueue ; /* Whether to activate the 'deliver on other core' code for an skbuff */ + + + struct bgnet_dev *bgnet ; + + /* Statistics */ + + int recv_total ; + int recv_guess_miss ; + int recv_no_skbuff ; + int recv_no_first_packet ; + + /* 'big' tables */ + struct bg_col_per_eth per_eth_table[k_ethkey_table_size] ; + struct sk_buff * skb_rcv_table[k_ethkey_table_size] ; + + /* Tuning statistics */ +#if defined(KEEP_BG_COL_STATISTICS) + unsigned int send_fifo_histogram0[16] ; + unsigned int send_fifo_histogram1[16] ; + unsigned int recv_fifo_histogram0[16] ; + unsigned int recv_fifo_histogram1[16] ; +#if defined(EXTRA_TUNING) + unsigned int send_fifo_histogram2[16] ; + unsigned int recv_fifo_histogram2[16] ; +#endif +#endif + unsigned int spurious_interrupts ; + /* Diagnostic controls */ + struct ctl_table_header * sysctl_table_header ; +#if defined(HAS_MISSED_INTERRUPT_TIMER) + struct timer_list missed_interrupt_timer ; +#endif +}; + +/********************************************************************** + * driver + **********************************************************************/ + +#define COL_DEV_NAME "bgcol" + +extern int bgcol_debug_tracemask ; +struct bg_col; + +struct bg_col *bgcol_get_dev(void); +void bgcol_enable_interrupts(struct bg_col* col); +unsigned int bgcol_get_nodeid(struct bg_col* col); +void bgcol_link_hdr_init(struct bglink_hdr_col *lnkhdr); +int bgcol_xmit(struct bg_col *col, int chnidx, union bgcol_header dest, + struct bglink_hdr_col *lnkhdr, void *data, int len); +int __bgcol_xmit(struct bg_col *col, int chnidx, union bgcol_header dest, + struct bglink_hdr_col *lnkhdr, void *data, int len); + +void bgcol_set_mtu(struct bg_col* col, unsigned int mtu) ; +void bgcol_enable_inj_wm_interrupt(struct bgcol_channel* chn); +void bgcol_disable_inj_wm_interrupt(struct bgcol_channel* chn); +void bgcol_enable_rcv_wm_interrupt(struct bgcol_channel* chn); +void bgcol_disable_rcv_wm_interrupt(struct bgcol_channel* chn); + +void bgcol_duplex_slih(unsigned long dummy) ; + +int col_start_xmit(struct sk_buff *skb, struct net_device *dev); +int __init bgcol_module_init(void) ; +enum { + bgcol_diagnostic_use_napi = 1 +}; +/* extern int bgcol_diagnostic_use_napi ; */ + +#endif diff --git a/drivers/net/bgp_collective/bglink.h b/drivers/net/bgp_collective/bglink.h new file mode 100644 index 00000000000000..37feca2a68a2a7 --- /dev/null +++ b/drivers/net/bgp_collective/bglink.h @@ -0,0 +1,158 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Authors: Chris Ward <tjcw@uk.ibm.com> + * Volkmar Uhlig <vuhlig@us.ibm.com> + * + * Description: Link layer definitions + * + * + ********************************************************************/ +#ifndef __DRIVERS__BLUEGENE__LINK_H__ +#define __DRIVERS__BLUEGENE__LINK_H__ + +#include <linux/skbuff.h> + +#include <asm/atomic.h> + +/* link layer protocol IDs */ +#define BGLINK_P_NET 0x01 +#define BGLINK_P_CON 0x10 + +union link_proto_opt { + u16 raw; + struct { + u16 option : 4; + u16 pad_head : 4; + u16 pad_tail : 8; + } opt_net; + struct { + u16 len; + } opt_con; +} __attribute__((packed)); + +struct bglink_hdr_col { + u32 dst_key; + u32 src_key; + u16 conn_id; + u8 this_pkt; + u8 total_pkt; + u16 lnk_proto; /* net, con, ... */ + union link_proto_opt opt; +} ; /* __attribute__((packed)); */ + +struct bglink_hdr_col_map { + u32 dst_key; + u32 src_key; + u32 conn_this_total; + u32 proto_option_head_tail ; +} ; + +struct bglink_hdr_torus { + u32 dst_key; + u32 len; + u16 lnk_proto; /* net, con, ... */ + union link_proto_opt opt; +} ; /* __attribute__((packed)); */ + +/* link protocol callbacks + * rcv is called when new packet arrives + * flush is called when the device was busy and becomes idle + * again (flow control) + */ +struct bgnet_dev ; +struct bg_col ; +struct bglink_proto { + u16 lnk_proto; + int receive_from_self; + int (*col_rcv)(struct bg_col*, struct sk_buff*, struct bglink_hdr_col *, struct bglink_proto *proto); + int (*col_rcv_trimmed)(struct bg_col*, struct sk_buff*, struct bglink_proto *proto, unsigned int src_key); + int (*col_flush)(int chn); + int (*torus_rcv)(struct sk_buff*, struct bglink_hdr_torus *); + void *private; + struct list_head list; +}; + +extern struct list_head linkproto_list; + +static void bglink_register_proto(struct bglink_proto *proto) __attribute__ ((unused)) ; +static void bglink_unregister_proto(struct bglink_proto *proto) __attribute__ ((unused)) ;; +static struct bglink_proto* bglink_find_proto(u16 proto)__attribute__ ((unused)) ; + +enum { + k_link_protocol_limit = 8 /* we only actually have 'eth' and 'eth_reflector' at the moment, but we might get 'con' and more */ +}; +extern struct bglink_proto * proto_array[k_link_protocol_limit] ; +static void bglink_register_proto(struct bglink_proto *proto) +{ + if( proto->lnk_proto < k_link_protocol_limit) + { + proto_array[proto->lnk_proto] = proto ; + } +} + +static void bglink_unregister_proto(struct bglink_proto *proto) +{ + if( proto->lnk_proto < k_link_protocol_limit) + { + proto_array[proto->lnk_proto] = NULL ; + } +} + +static struct bglink_proto* bglink_find_proto(u16 proto) +{ + return proto_array[proto & (k_link_protocol_limit-1)] ; +} + + +#if 0 +/* + * Here are some thoughts on how we might better consolidate link headers + * for the col and torus. The idea is that there's an 8-byte packet header + * that must be sent (at least) once per packet, and an 8-byte fragment header + * that has to be included with every fragment. For the col we can include + * both headers in every fragment. For the torus, there's not room to send + * the packet header in every fragment, so we'd have to send it once as part + * of the payload in the first fragment (as we're doing now anyway). + * The various structures might look something like: + */ + +struct pkt_hdr { + u32 lnk_proto : 8; + u32 dst_key : 24; + u16 len; + u16 private; +} __attribute__((packed)); + +struct frag_hdr { + u32 offset; + u32 conn_id : 8; + u32 src_key : 24; +} __attribute__((packed)); + +struct frag_hdr_col { + struct pkt_hdr pkt; + struct frag_hdr frag; +} __attribute__((packed)); + +struct frag_hdr_torus { + union torus_fifo_hw_header fifo; + struct frag_hdr frag; +} __attribute__((packed)); +#endif + +#endif /* !__DRIVERS__BLUEGENE__LINK_H__ */ diff --git a/drivers/net/bgp_collective/bgnet.c b/drivers/net/bgp_collective/bgnet.c new file mode 100644 index 00000000000000..2dc45ac5cef0d7 --- /dev/null +++ b/drivers/net/bgp_collective/bgnet.c @@ -0,0 +1,827 @@ +/********************************************************************* + * + * Description: Blue Gene driver exposing col and torus as a NIC + * + * Copyright (c) 2007, 2010 International Business Machines + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Authors: + * Chris Ward <tjcw@uk.ibm.com> + * Volkmar Uhlig <vuhlig@us.ibm.com> + * Andrew Tauferner <ataufer@us.ibm.com> + * + ********************************************************************/ + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/etherdevice.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <linux/ip.h> +#include <linux/workqueue.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/bgp_personality.h> +#include <asm/delay.h> + +#include <asm/bluegene.h> + +#include "bglink.h" +#include "bgnet.h" +#include "bgcol.h" +/* #include "bgtor.h" */ + + +/********************************************************************** + * defines + **********************************************************************/ + +#define DRV_NAME "bgnet" +#define DRV_VERSION "0.5" +#define DRV_DESC "Blue Gene NIC (IBM)" + +MODULE_DESCRIPTION(DRV_DESC); +MODULE_AUTHOR("IBM"); + +/* #define TRUST_TREE_CRC */ + +#include <linux/KernelFxLog.h> + +#include "../bgp_network/bgp_net_traceflags.h" + + +#define XTRACEN(i,x...) +#if defined(REQUIRE_TRACE) +#define TRACE(x...) { printk(KERN_EMERG x) ; } +#define TRACE1(x...) { printk(KERN_EMERG x) ; } +#define TRACE2(x...) { printk(KERN_EMERG x) ; } +#define TRACEN(i,x...) { printk(KERN_EMERG x) ; } +#define TRACED(x...) { printk(KERN_EMERG x) ; } +#elif defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) +#define TRACE(x...) KernelFxLog(bgcol_debug_tracemask & k_t_general,x) +#define TRACE1(x...) KernelFxLog(bgcol_debug_tracemask & k_t_lowvol,x) +#define TRACE2(x...) KernelFxLog(bgcol_debug_tracemask & k_t_detail,x) +#define TRACEN(i,x...) KernelFxLog(bgcol_debug_tracemask & (i),x) +#define TRACED(x...) KernelFxLog(1,x) +#define TRACES(x...) KernelFxLog(1,x) +#else +#define TRACE(x...) +#define TRACE1(x...) +#define TRACE2(x...) +#define TRACEN(i,x...) +#define TRACED(x...) +#define TRACES(x...) +#endif + +/* An IPv4 address for slotting into a trace message */ +#define NIPQ(X) ((X)>>24)&0xff,((X)>>16)&0xff,((X)>>8)&0xff,(X)&0xff + +#define BGNET_FRAG_MTU 240 +#define BGNET_MAX_MTU (BGNET_FRAG_MTU * 254) +#define BGNET_DEFAULT_MTU ETH_DATA_LEN + + +static BGP_Personality_t bgnet_personality; +/* static struct net_device *static_dev ; */ + +/* static struct bglink_proto bgnet_lnk; */ + +static DEFINE_SPINLOCK(bgnet_lock); +static LIST_HEAD(bgnet_list); + +struct skb_cb_lnk { + struct bglink_hdr_col lnkhdr; + union bgcol_header dest; +}; + +int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev, unsigned int x, unsigned int y, unsigned int z) ; + +/********************************************************************** + * Linux module + **********************************************************************/ + +MODULE_DESCRIPTION("BlueGene Ethernet driver"); +MODULE_LICENSE("GPL"); + +int bgnic_driverparm = 0 ; + +static void dumpmem(const void *address, unsigned int length, const char * label) + { + int x ; + TRACEN(k_t_fifocontents,"Memory dump, length=%d: %s",length,label) ; + if( length > 256 ) { + length = 256 ; + } + for (x=0;x<length;x+=32) + { + int *v = (int *)(address+x) ; + TRACEN(k_t_fifocontents,"%p: %08x %08x %08x %08x %08x %08x %08x %08x", + v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7] + ) ; + } + } + + +/********************************************************************** + * Linux' packet and skb management + **********************************************************************/ + + +static int bgnet_open(struct net_device* dev) +{ +/* struct bgnet_dev* bgnet = (struct bgnet_dev*) netdev_priv(dev); */ + +/* bgcol_enable_rcv_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */ + + TRACEN(k_t_napi,"netif_start_queue(dev=%p)",dev) ; + netif_start_queue(dev); + + return 0; +} + +static int bgnet_stop(struct net_device* dev) +{ +/* struct bgnet_dev* bgnet = (struct bgnet_dev*) netdev_priv(dev); */ + + TRACEN(k_t_napi,"netif_stop_queue(dev=%p)",dev) ; + netif_stop_queue(dev); +/* bgcol_disable_rcv_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */ +/* bgcol_disable_inj_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */ + + return 0; +} + + +static int bgnet_change_mtu(struct net_device *dev, int new_mtu) +{ + struct bgnet_dev *bgnet = netdev_priv(dev); + if (new_mtu < 60 || new_mtu > BGNET_MAX_MTU ) + return -EINVAL; + dev->mtu = new_mtu; + bgcol_set_mtu(bgnet->bgcol, new_mtu+sizeof(struct ethhdr)) ; + return 0; +} + + +static inline void stamp_checksum_place_in_skb(struct sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + unsigned int eth_proto = eth->h_proto ; + struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ; + /* unsigned int iphlen = 4*iph->ihl ; */ + /* struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) ); */ + /* struct udphdr *udph = (struct udphdr *) ( ((char *)(iph)) + (iphlen) ); */ + unsigned int ip_proto = iph->protocol ; + skb->csum_start = skb_transport_header(skb) - skb->head; + + if( eth_proto == ETH_P_IP) { + if( ip_proto == IPPROTO_TCP) skb->csum_offset = offsetof(struct tcphdr, check); + else if( ip_proto == IPPROTO_UDP) skb->csum_offset = offsetof(struct udphdr, check); + } + +} + +/* + * The hardware data rate on 'collective' is 6 bits/cycle, i.e. 5100Mb/s . + * We carry 240 bytes of payload in each 256 byte packet, and there are some bytes of 'overhead' as well + * (CRC, opcode, and a few others); giving a 'peak performance' TCP/IP data rate of a little under 4781 Mb/s . + * The 'collective' hardware should be able to do this in both directions simultaneously. + * + * Driving data into the compute fabric from the 10gE link can achieve more or less this, by using one core as + * interrupt handler for the 10gE and another core as interrupt handler for the collective, if you run (say) + * 16 TCP/IP sessions through the 10gE and the IO node, one to each compute node in the PSET. + * + * Driving data out of the compute fabric and into the 10gE in the normal way for linux device drivers causes + * the core handling the collective interrupt to go 100% busy; there are not enough cycles to drain the collective + * FIFO and also go through the linux networking stack. I have seen about 4Gb/s this way. + * To get the last 15% or so, it seems necessary to have more than one core helping with this work. + * + * I'm trying to do this by having one core handle the 'collective' interrupt and drain the FIFO, and then + * hand the sk_buff off to another core via a 'work queue', so that this second core can drive the linux + * network stack. + * + * I haven't measured the simultaneous-bidirectional data rate capability. + * + */ +static int bgnet_receive(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_hdr_col *lnkhdr, struct bglink_proto* proto) +{ + TRACE("(>) skb=%p lnkhdr=%p proto=%p", skb,lnkhdr,proto) ; + if( skb != NULL && lnkhdr != NULL && proto != NULL && -1 != (int) proto ) + { + struct net_device *dev = (struct net_device*)proto->private; + struct bgnet_dev *bgnet = netdev_priv(dev); +/* struct net_device *dev = (struct net_device*)((void *)bgnet - */ +/* netdev_priv(NULL)); */ + + TRACE("bgnet rcvd pkt: data=%p, len=%d, head=%d, tail=%d, res len=%d [%s:%d]", + skb->data, skb->len, lnkhdr->opt.opt_net.pad_head, + lnkhdr->opt.opt_net.pad_tail, skb->len - lnkhdr->opt.opt_net.pad_head - lnkhdr->opt.opt_net.pad_tail, __func__, __LINE__); + +/* if (skb->len % BGNET_FRAG_MTU != 0) */ +/* printk("bgnet: received packet size not multiple of %d\n", BGNET_FRAG_MTU); */ + + /* skb_pull and trim check for over/underruns. For 0 size the + * add/subtract is the same as a test */ + __skb_pull(skb, lnkhdr->opt.opt_net.pad_head); + __skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail); + + if (lnkhdr->src_key == bgnet->bgcol_vector) { + /* drop ether packets that are from ourselves */ + /* bg tree device sends packets to itself when broadcasting */ + kfree_skb(skb); + return 0; + } + + /* dump_skb(skb); */ + + dumpmem(skb->data,skb->len,"Frame delivered via collective") ; + + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + + if ( k_trust_collective_crc) skb->ip_summed = CHECKSUM_PARTIAL ; + stamp_checksum_place_in_skb(skb) ; + +/* #if defined(TRUST_TREE_CRC) */ +/* skb->ip_summed = CHECKSUM_PARTIAL ; // Frame was checked by CRC, but we would need a checksum if it is being forwarded off the BGP fabric */ +/* // // Packets from tree-local addresses have been verified by tree hardware */ +/* // { */ +/* // struct ethhdr *eth = (struct ethhdr *)skb->data; */ +/* // if (bgnet->eth_mask == 0 || */ +/* // ((bgnet->eth_mask & *(unsigned int *)(ð->h_source[0])) == */ +/* // (bgnet->eth_local))) */ +/* // { */ +/* // skb->ip_summed = CHECKSUM_UNNECESSARY ; */ +/* // } */ +/* // else */ +/* // { */ +/* // skb->ip_summed = CHECKSUM_NONE ; */ +/* // } */ +/* // } */ +/* #endif */ + + TRACE("Delivering skb->dev=%p skb->protocol=%d skb->pkt_type=%d skb->ip_summed=%d ", + skb->dev, skb->protocol, skb->pkt_type, skb->ip_summed ) ; + dumpmem(skb->data,skb->len,"Frame after stripping header") ; + dev->last_rx = jiffies; + bgnet->stats.rx_packets++; + bgnet->stats.rx_bytes += skb->len; + + TRACE("bgnet_receive before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.tx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.tx_bytes=%lu bgnet->stats.rx_frame_errors=%lu", + bgnet->stats.rx_packets, bgnet->stats.tx_packets, bgnet->stats.rx_bytes, bgnet->stats.tx_bytes, bgnet->stats.rx_frame_errors) ; +/* TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; // Only tracing the torus ... */ +/* if( k_deliver_via_workqueue && bgnet->bgcol->deliver_via_workqueue ) */ +/* { */ +/* bgnet_deliver_via_workqueue(skb) ; */ +/* } */ +/* else */ +/* { */ +#if defined(CONFIG_BGP_COLLECTIVE_NAPI) + if( bgcol_diagnostic_use_napi) + { + { + TRACEN(k_t_napi|k_t_request,"netif_receive_skb(%p)",skb) ; + netif_receive_skb(skb) ; + } + } + else + { + netif_rx(skb); + } +#else + netif_rx(skb); +#endif +/* } */ + TRACE("bgnet_receive after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu", + bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ; + + } + TRACE("(<)") ; + + return 0; +} + +static int bgnet_receive_trimmed(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_proto* proto, unsigned int src_key ) +{ + TRACE("(>) skb=%p proto=%p", skb,proto) ; + if( skb != NULL && proto != NULL && -1) + { + struct net_device *dev = (struct net_device*)proto->private; + struct bgnet_dev *bgnet = netdev_priv(dev); +/* struct net_device *dev = (struct net_device*)((void *)bgnet - */ +/* netdev_priv(NULL)); */ + + TRACE("bgnet rcvd pkt: data=%p, len=%d", + skb->data, skb->len); + if( src_key != bgnet->bgcol_vector) + { + dumpmem(skb->data,skb->len,"Frame delivered via collective") ; + + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + + if ( k_trust_collective_crc) skb->ip_summed = CHECKSUM_PARTIAL ; + stamp_checksum_place_in_skb(skb) ; + + + TRACE("Delivering skb->dev=%p skb->protocol=%d skb->pkt_type=%d skb->ip_summed=%d ", + skb->dev, skb->protocol, skb->pkt_type, skb->ip_summed ) ; + dumpmem(skb->data,skb->len,"Frame after stripping header") ; + dev->last_rx = jiffies; + bgnet->stats.rx_packets++; + bgnet->stats.rx_bytes += skb->len; + + TRACE("bgnet_receive before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.tx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.tx_bytes=%lu bgnet->stats.rx_frame_errors=%lu", + bgnet->stats.rx_packets, bgnet->stats.tx_packets, bgnet->stats.rx_bytes, bgnet->stats.tx_bytes, bgnet->stats.rx_frame_errors) ; + /* TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; // Only tracing the torus ... */ + #if defined(CONFIG_BGP_COLLECTIVE_NAPI) + if( bgcol_diagnostic_use_napi) + { + { + TRACEN(k_t_napi|k_t_request,"netif_receive_skb(%p)",skb) ; + netif_receive_skb(skb) ; + } + } + else + { + netif_rx(skb); + } + #else + netif_rx(skb); + #endif + TRACE("bgnet_receive after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu", + bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ; + } + else + { + /* a discardable self-send */ + dev_kfree_skb(skb) ; + } + + } + TRACE("(<)") ; + + return 0; +} + + +/* A packet gets to the IO node, and needs 'reflecting' to the compute node(s) that want it. */ +static int col_reflect(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_hdr_col *lnkhdr, + struct bglink_proto* proto) +{ + TRACE("(>) col_reflect skb=%p lnkhdr=%p proto=%p", skb,lnkhdr,proto) ; + if( skb != NULL && lnkhdr != NULL && proto != NULL && -1 != (int) proto ) + { + struct net_device *dev = (struct net_device*)proto->private; + struct bgnet_dev *bgnet = netdev_priv(dev); + + + TRACE("bgnet rcvd pkt for reflection: data=%p, len=%d, head=%d, tail=%d, res len=%d [%s:%d]", + skb->data, skb->len, lnkhdr->opt.opt_net.pad_head, + lnkhdr->opt.opt_net.pad_tail, skb->len - lnkhdr->opt.opt_net.pad_head - lnkhdr->opt.opt_net.pad_tail, __func__, __LINE__); + +/* if (skb->len % BGNET_FRAG_MTU != 0) */ +/* printk("bgnet: received packet size not multiple of %d\n", BGNET_FRAG_MTU); */ + + /* skb_pull and trim check for over/underruns. For 0 size the + * add/subtract is the same as a test */ + __skb_pull(skb, lnkhdr->opt.opt_net.pad_head); + __skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail); + /* A 'broadcast' packet needs delivering locally as well as reflecting */ + { + struct ethhdr *eth = (struct ethhdr *)skb->data; + if (is_broadcast_ether_addr(eth->h_dest)) { + struct sk_buff *localskb = skb_clone(skb, GFP_KERNEL); + if( localskb ) + { + dumpmem(localskb->data,localskb->len,"Frame delivered via tree (broadcast reflection)") ; + localskb->dev = dev; + localskb->protocol = eth_type_trans(localskb, dev); + + localskb->ip_summed = CHECKSUM_UNNECESSARY ; /* Packet was from tree, h/w verified it */ + + TRACE("Delivering localskb->dev=%p localskb->protocol=%d localskb->pkt_type=%d localskb->ip_summed=%d ", + localskb->dev, localskb->protocol, localskb->pkt_type, localskb->ip_summed ) ; + dumpmem(localskb->data,localskb->len,"Frame after stripping header") ; + dev->last_rx = jiffies; + bgnet->stats.rx_packets++; + bgnet->stats.rx_bytes += localskb->len; + TRACE("col_reflect before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu", + bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ; +/* TRACEN(k_t_napi,"netif_rx(skb=%p)",localskb) ; // Only tracing the torus ... */ +#if defined(CONFIG_BGP_COLLECTIVE_NAPI) + if( bgcol_diagnostic_use_napi) + { + TRACEN(k_t_napi,"netif_receive_skb(%p)",localskb) ; + netif_receive_skb(localskb) ; + } + else + { + netif_rx(localskb); + } +#else + netif_rx(localskb) ; +#endif + TRACE("col_reflect after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu", + bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ; + } + } + } + + + /* dump_skb(skb); */ + col_start_xmit(skb, dev) ; + } + + TRACE("(<) col_reflect") ; + + return 0; +} + +/* A packet gets to the IO node, and needs 'reflecting' to the compute node(s) that want it. */ +static int col_reflect_trimmed(struct bg_col *bgcol, struct sk_buff *skb, + struct bglink_proto* proto, unsigned int src_key ) +{ + TRACE("(>) col_reflect skb=%p proto=%p", skb,proto) ; + if( skb != NULL && proto != NULL && -1 != (int) proto ) + { + struct net_device *dev = (struct net_device*)proto->private; + struct bgnet_dev *bgnet = netdev_priv(dev); + + + TRACE("bgnet rcvd pkt for reflection: data=%p, len=%d", + skb->data, skb->len); + + + /* A 'broadcast' packet needs delivering locally as well as reflecting */ + { + struct ethhdr *eth = (struct ethhdr *)skb->data; + if (is_broadcast_ether_addr(eth->h_dest)) { + struct sk_buff *localskb = skb_clone(skb, GFP_KERNEL); + if( localskb ) + { + dumpmem(localskb->data,localskb->len,"Frame delivered via tree (broadcast reflection)") ; + localskb->dev = dev; + localskb->protocol = eth_type_trans(localskb, dev); + + localskb->ip_summed = CHECKSUM_UNNECESSARY ; /* Packet was from tree, h/w verified it */ + + TRACE("Delivering localskb->dev=%p localskb->protocol=%d localskb->pkt_type=%d localskb->ip_summed=%d ", + localskb->dev, localskb->protocol, localskb->pkt_type, localskb->ip_summed ) ; + dumpmem(localskb->data,localskb->len,"Frame after stripping header") ; + dev->last_rx = jiffies; + bgnet->stats.rx_packets++; + bgnet->stats.rx_bytes += localskb->len; + TRACE("col_reflect before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu", + bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ; +/* TRACEN(k_t_napi,"netif_rx(skb=%p)",localskb) ; // Only tracing the torus ... */ +#if defined(CONFIG_BGP_COLLECTIVE_NAPI) + if( bgcol_diagnostic_use_napi) + { + TRACEN(k_t_napi,"netif_receive_skb(%p)",localskb) ; + netif_receive_skb(localskb) ; + } + else + { + netif_rx(localskb); + } +#else + netif_rx(localskb) ; +#endif + TRACE("col_reflect after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu", + bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ; + } + } + } + + + /* dump_skb(skb); */ + col_start_xmit(skb, dev) ; + } + + TRACE("(<) col_reflect") ; + + return 0; +} + + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void bgnet_poll(struct net_device *dev) +{ + /* no-op; packets are fed by the col device */ +} +#endif + +static inline int is_torus_ether_addr(const u8 *addr) +{ + return ((addr[0] & 0x7) == 0x6); +} + + +unsigned int find_xyz_address(unsigned int ip) ; + + +static int bgnet_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + col_start_xmit(skb, dev) ; + return 0 ; +} + +static void bgnet_uninit(struct net_device *dev) +{ + struct bgnet_dev *bgnet = netdev_priv(dev); + + bglink_unregister_proto(&bgnet->lnk); + bglink_unregister_proto(&bgnet->lnkreflect); + +} + +static struct net_device_stats *bgnet_get_stats(struct net_device *dev) +{ + struct bgnet_dev* bgnet = netdev_priv(dev); + + return &bgnet->stats; +} + + +static int bgnet_set_mac_addr(struct net_device* netDev, + void* p) +{ + struct sockaddr* addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(netDev->dev_addr, addr->sa_data, netDev->addr_len); + + return 0; +} + + +static int bgnet_set_config(struct net_device* netDev, + struct ifmap* map) +{ + int rc = 0; + struct bgnet_dev* bgnet = netdev_priv(netDev); + + /* Set this with ifconfig <interface> port <collective virtual channel> */ + if (map->port) + bgnet->bgcol_channel = map->port; + + /* Set this with ifconifg <interface> io_addr <collective route> */ + if (map->base_addr) + bgnet->bgcol_route = map->base_addr; + + return rc; +} + + +static int bgnet_init(struct net_device *dev) +{ + struct bgnet_dev *bgnet = netdev_priv(dev); + TRACE("(>) bgnet_init") ; + bgnet->bgcol_route = 0 /*15*/; +#define ETH_COL_CHANNEL 0 + bgnet->bgcol_channel = 0 ; +/* bgnet->bgcol_channel = (bgnet_personality.Block_Config & BGP_PERS_BLKCFG_IPOverCollectiveVC) ? 1 : 0; */ +/* bgnet->eth_bridge_vector = -1; */ +/* bgnet->link_protocol = BGLINK_P_NET; */ +/* bgnet->net_device = dev; */ + + bgnet->bgcol = bgcol_get_dev(); + TRACE("(=) bgnet->bgcol=%p",bgnet->bgcol) ; + + if (!bgnet->bgcol) + return -1; + + bgnet->bgcol->bgnet_channel = bgnet->bgcol_channel ; +/* bgnet->phandle_tree = 3; */ +/* bgnet->phandle_torus = 0; */ +/* // bgnet->tree_route = 15; // 15 is 'partition flood' */ +/* bgnet->tree_route = 0 ; // 0 is 'compute to IO' or 'IO to compute' */ +/* bgnet->tree_channel = BGNET_TREE_CHANNEL ; */ +/* bgnet->eth_mask = 0; */ +/* // bgnet->eth_bridge_vector = 0; // route through the I/O node? (personality.Network_Config.IONodeRank) */ +/* bgnet->eth_bridge_vector = personality.Network_Config.IOnodeRank; // route through the I/O node? (personality.Network_Config.IONodeRank) */ + bgnet->eth_bridge_vector = bgnet_personality.Network_Config.IOnodeRank; /* route through the I/O node? (personality.Network_Config.IONodeRank) */ + bgnet->bgcol_protocol = 1; + bgnet->bgcol_reflector_protocol = 2 ; /* CN requests reflection from ION */ + +/* bgnet->i_am_ionode = ( personality.Network_Config.IOnodeRank == personality.Network_Config.Rank) ; */ +#if 0 + p = get_property(np, "local-mac-address", NULL); + if (p == NULL) { + printk(KERN_ERR "%s: Can't find local-mac-address property\n", + np->full_name); + goto err; + } + memcpy(dev->dev_addr, p, 6); +#endif + dev->dev_addr[0] = 0x00; + dev->dev_addr[1] = 0x80; + *((unsigned*)(&dev->dev_addr[2])) = 0x46000000u | bgnet_personality.Network_Config.Rank; /* why 0x46yyyyyy ??? */ + + bgnet->bgcol_vector = *(unsigned int *)(&dev->dev_addr[2]); + bgnet->eth_local = bgnet->eth_mask & *(unsigned int *)&dev->dev_addr[0]; + + spin_lock(&bgnet_lock); + if (list_empty(&bgnet_list)) { + /* register with col */ +/* bgnet_lnk.lnk_proto = bgnet->link_protocol; */ +/* bgnet_lnk.receive_from_self = 0; */ +/* bgnet_lnk.col_rcv = col_receive; */ +/* bgnet_lnk.col_flush = col_flush; */ +/* bgnet_lnk.torus_rcv = torus_receive; */ +/* bglink_register_proto(&bgnet_lnk); */ + bgnet->lnk.lnk_proto = bgnet->bgcol_protocol; + bgnet->lnk.col_rcv = bgnet_receive; + bgnet->lnk.col_rcv_trimmed = bgnet_receive_trimmed; + bgnet->lnk.private = dev; + bglink_register_proto(&bgnet->lnk); + + bgnet->lnkreflect.lnk_proto = bgnet->bgcol_reflector_protocol; + bgnet->lnkreflect.col_rcv = col_reflect; + bgnet->lnkreflect.col_rcv_trimmed = col_reflect_trimmed; + bgnet->lnkreflect.private = dev; + bglink_register_proto(&bgnet->lnkreflect); + + /* Hook for the tree interrupt handler to find the 'bgnet' */ + bgnet->bgcol->bgnet = bgnet ; + } +/* list_add_rcu(&bgnet->list, &bgnet_list); */ +/* */ +/* spin_unlock(&bgnet_lock); */ +/* */ +/* skb_queue_head_init(&bgnet->pending_skb_list); */ + bgcol_enable_interrupts(bgnet->bgcol) ; /* Should be able to run tree interrupts now */ + + + TRACE("(<) bgnet_init") ; + return 0; +} + +#if defined(CONFIG_BGP_COLLECTIVE_NAPI) +static int bgnet_poll_napi(struct napi_struct * napi, int budget) +{ + TRACEN(k_t_napi,"(>) napi=%p budget%d",napi,budget) ; + bgcol_duplex_slih(0) ; + TRACEN(k_t_napi,"(<)") ; + return 0 ; +} +#endif + +#if defined(HAVE_NET_DEVICE_OPS) +static const struct net_device_ops netdev_ops = { + .ndo_change_mtu = bgnet_change_mtu , + .ndo_get_stats = bgnet_get_stats , + .ndo_start_xmit = bgnet_start_xmit , + .ndo_init = bgnet_init , + .ndo_uninit = bgnet_uninit , + .ndo_open = bgnet_open , + .ndo_stop = bgnet_stop , + .ndo_set_config = bgnet_set_config , + .ndo_set_mac_address = bgnet_set_mac_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = bgnet_poll, +#endif +}; +#endif +static int __init +bgnet_module_init(void) +{ + struct bgnet_dev *bgnet; + struct net_device *dev; + + TRACEN(k_t_general, "(>) bgnet_module_init") ; + dev = alloc_etherdev(sizeof(struct bgnet_dev)); + TRACEN(k_t_general, "(=) bgnet_module_init dev=%p", dev) ; + if (!dev) + return -ENOMEM; + +/* SET_MODULE_OWNER(dev); // Anachronism */ + + /* Read personality. */ + bluegene_getPersonality((void*) &bgnet_personality, sizeof(bgnet_personality)); + bgnet = (struct bgnet_dev*) netdev_priv(dev); + memset(bgnet, 0, sizeof(*bgnet)); + bgcol_module_init() ; +/* bgnet_init(dev); */ + +/* // Set the MAC address for this interface. */ +/* if (bluegene_isIONode()) { */ +/* unsigned char ipOctet2 = (bgnet_personality.Ethernet_Config.IPAddress.octet[13] + 1) & 0xfc; */ +/* */ +/* dev->dev_addr[0] = ipOctet2 | 2; */ +/* dev->dev_addr[1] = bgnet_personality.Ethernet_Config.IPAddress.octet[14]; */ +/* dev->dev_addr[2] = bgnet_personality.Ethernet_Config.IPAddress.octet[15]; */ +/* dev->dev_addr[3] = ((bgnet_personality.Network_Config.Rank >> 16) & 0x3f) | (ipOctet2 << 6); */ +/* dev->dev_addr[4] = (unsigned char) ((bgnet_personality.Network_Config.Rank >> 8)); */ +/* dev->dev_addr[5] = (unsigned char) bgnet_personality.Network_Config.Rank; */ +/* } else */ +/* memcpy(dev->dev_addr, bgnet_personality.Ethernet_Config.EmacID, sizeof(dev->dev_addr)); */ + +#if defined(HAVE_NET_DEVICE_OPS) + dev->netdev_ops = &netdev_ops ; +#else + dev->init = bgnet_init; + dev->uninit = bgnet_uninit; + dev->get_stats = bgnet_get_stats; + dev->hard_start_xmit = bgnet_start_xmit; + dev->change_mtu = bgnet_change_mtu; + dev->open = bgnet_open; + dev->stop = bgnet_stop; + dev->set_config = bgnet_set_config; + dev->set_mac_address = bgnet_set_mac_addr; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = bgnet_poll; +#endif +#endif + dev->mtu = BGNET_DEFAULT_MTU; + +/* Tried turning checksum generation off, but this resulted in packets routed off the BGP not having checksums */ +/* and lack of interoperability with front-end nodes */ +/* (try CHECKSUM_PARTIAL above to see if the TOMAL will generate an IP checksum in this circumstance) */ + dev->features = k_trust_collective_crc + ? (NETIF_F_HIGHDMA | NETIF_F_NO_CSUM) + : NETIF_F_HIGHDMA ; +/* if( k_trust_collective_crc) */ +/* { */ +/* dev->features = NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM ; */ +/* } */ +/* else */ +/* { */ +/* dev->features = NETIF_F_HIGHDMA ; */ +/* } */ + +/* #if defined(TRUST_TREE_CRC) */ +/* dev->features = NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA ; */ +/* #endif */ +/* dev->features |= NETIF_F_NO_CSUM; */ + + TRACEN(k_t_general,"(=) dev->name=%s", + dev->name + ) ; + { + int rc = register_netdev(dev) ; + TRACEN(k_t_general, "(=) bgnet_module_init register_netdev rc=%d", rc) ; + if( rc != 0 ) + goto err; + } + +#if defined(CONFIG_BGP_COLLECTIVE_NAPI) + netif_napi_add(dev,&bgnet->napi, bgnet_poll_napi, k_collective_budget) ; + napi_enable(&bgnet->napi) ; +#endif + /* increase header size to fit torus hardware header */ +/* if (bgnet->torus) */ +/* dev->hard_header_len += 16; */ + + if (bgnet->eth_bridge_vector != -1) + printk(KERN_INFO " bridge 0x%06x\n", bgnet->eth_bridge_vector); + + TRACEN(k_t_general, "(<) bgnet_module_init rc=0") ; + return 0; + + err: + free_netdev(dev); + TRACEN(k_t_general, "(<) bgnet_module_init err rc=-1") ; + return -1; +} + + +/* static void __exit */ +/* bgnet_module_exit (void) */ +/* { */ +/* return; */ +/* } */ + +module_init(bgnet_module_init); +/* module_exit(bgnet_module_exit); */ diff --git a/drivers/net/bgp_collective/bgnet.h b/drivers/net/bgp_collective/bgnet.h new file mode 100644 index 00000000000000..48748d0a2425ae --- /dev/null +++ b/drivers/net/bgp_collective/bgnet.h @@ -0,0 +1,152 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Authors: Volkmar Uhlig <vuhlig@us.ibm.com> + * Chris Ward <tjcw@uk.ibm.com> + * + * Description: definitions for BG networks + * + * + ********************************************************************/ + +#ifndef __DRIVERS__NET__BLUEGENE__BGNET_H__ +#define __DRIVERS__NET__BLUEGENE__BGNET_H__ + +/* static inline unsigned int BG_IRQ(unsigned int group, unsigned int irq) */ +/* { */ +/* return ((group) << 5 | (irq)) ; */ +/* } */ +/* #define BG_IRQ(group, irq) ((group) << 5 | (irq)) */ + + +/********************************************************************** + * link layer + **********************************************************************/ + +/* enum { */ +/* BGNET_P_ETH0 = 1 , */ +/* BGNET_P_ETH1 = 2 , */ +/* BGNET_P_ETH2 = 3 , */ +/* BGNET_P_ETH3 = 4 , */ +/* BGNET_P_ETH4 = 5 , */ +/* BGNET_P_ETH5 = 6 , */ +/* BGNET_P_ETH6 = 7 , */ +/* BGNET_P_ETH7 = 8 , */ +/* BGNET_P_ETH8 = 9 , */ +/* BGNET_P_LAST_ETH = BGNET_P_ETH8 , */ +/* BGNET_P_CONSOLE = 20 */ +/* }; */ +/* //#define BGNET_P_ETH0 1 */ +/* //#define BGNET_P_ETH1 2 */ +/* //#define BGNET_P_ETH2 3 */ +/* //#define BGNET_P_ETH3 4 */ +/* //#define BGNET_P_ETH4 5 */ +/* //#define BGNET_P_ETH5 6 */ +/* //#define BGNET_P_ETH6 7 */ +/* //#define BGNET_P_ETH7 8 */ +/* //#define BGNET_P_ETH8 9 */ +/* //#define BGNET_P_LAST_ETH BGNET_P_ETH8 */ +/* // */ +/* //#define BGNET_P_CONSOLE 20 */ + +/* Facility for using multiple cores in support of 'collective', only make it happen if multiple cores are available ... */ +#if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_VRNIC) +#define COLLECTIVE_TREE_AFFINITY +#endif + +#if defined(COLLECTIVE_TREE_AFFINITY) +/* On IO nodes, 10gE will be using core 0. On Compute nodes, torus will be using core 2. So exploit cores 1 and 3 for collective ... */ +enum { + k_TreeAffinityCPU = 1 , + k_WorkqueueDeliveryCPU = 3 +}; +#else +enum { + k_TreeAffinityCPU = 0 , + k_WorkqueueDeliveryCPU = 0 +}; +#endif + + +enum { + BGNET_FRAG_MTU = 240 , +/* BGNET_MAX_MTU = BGNET_FRAG_MTU * 128 , */ + BGNET_DEFAULT_MTU = ETH_DATA_LEN +}; +/* #define BGNET_FRAG_MTU 240 */ +/* #define BGNET_MAX_MTU (BGNET_FRAG_MTU * 128) */ +/* //#define BGNET_DEFAULT_MTU (BGNET_FRAG_MTU * 30 - 12) */ +/* #define BGNET_DEFAULT_MTU ETH_DATA_LEN */ + +/* // Which bgcol channel to use for the driver */ +/* #define BGNET_TREE_CHANNEL 0 */ + +enum { + k_trust_collective_crc = +#if defined(BGP_COLLECTIVE_IP_CHECKSUM) + 0 +#else + 1 +#endif + /* Whether the IP layer should trust the BGP hardware CRC on the collective network */ +}; + +enum { + k_collective_budget = 1000 /* Number of frames we are willing to collect from the tree before we 'yield' */ +}; + +enum { + k_deliver_via_workqueue = 1 /* Whether to deliver via a work queue (on another core) */ +}; +struct bgnet_dev +{ + struct bg_col *bgcol; + unsigned int bgcol_route; + unsigned int bgcol_channel; + unsigned short bgcol_protocol; + unsigned short bgcol_reflector_protocol ; + unsigned int bgcol_vector; + unsigned int eth_mask; + unsigned int eth_local; + unsigned int eth_bridge_vector; + struct bglink_proto lnk; + struct bglink_proto lnkreflect; + struct net_device_stats stats; + u32 phandle_bgcol; + u32 phandle_torus; + struct sk_buff_head xmit_list ; /* List of skb's to be sent */ +#if defined(CONFIG_BGP_COLLECTIVE_NAPI) + struct napi_struct napi ; +#endif +/* unsigned int i_am_ionode ; */ +}; + +extern inline unsigned int eth_to_key(char *addr) +{ + unsigned int key; + if (is_broadcast_ether_addr(addr)) + key = ~0U; + else + key = (addr[3] << 16) | (addr[4] << 8) | (addr[5] << 0); + return key; +} + + +/* extern struct list_head bglink_proto; */ +/* extern struct bglink_proto bgnet_eth; */ + +#endif /* !__DRIVERS__NET__BLUEGENE__BGNIC_H__ */ diff --git a/drivers/net/bgp_collective/bgp_dcr.h b/drivers/net/bgp_collective/bgp_dcr.h new file mode 100644 index 00000000000000..f1f60c24436dc1 --- /dev/null +++ b/drivers/net/bgp_collective/bgp_dcr.h @@ -0,0 +1,1041 @@ +/********************************************************************* + * + * Description: BGP DCR map (copied from bpcore) + * + * Copyright (c) 2007, 2008 International Business Machines + * Volkmar Uhlig <vuhlig@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + ********************************************************************/ + +#ifndef _BGP_DCR_H_ +#define _BGP_DCR_H_ + +#define _BN(b) ((1<<(31-(b)))) +#define _B1(b,x) (((x)&0x1)<<(31-(b))) +#define _B2(b,x) (((x)&0x3)<<(31-(b))) +#define _B3(b,x) (((x)&0x7)<<(31-(b))) +#define _B4(b,x) (((x)&0xF)<<(31-(b))) +#define _B5(b,x) (((x)&0x1F)<<(31-(b))) +#define _B6(b,x) (((x)&0x3F)<<(31-(b))) +#define _B7(b,x) (((x)&0x7F)<<(31-(b))) +#define _B8(b,x) (((x)&0xFF)<<(31-(b))) +#define _B9(b,x) (((x)&0x1FF)<<(31-(b))) +#define _B10(b,x) (((x)&0x3FF)<<(31-(b))) +#define _B11(b,x) (((x)&0x7FF)<<(31-(b))) +#define _B12(b,x) (((x)&0xFFF)<<(31-(b))) +#define _B13(b,x) (((x)&0x1FFF)<<(31-(b))) +#define _B14(b,x) (((x)&0x3FFF)<<(31-(b))) +#define _B15(b,x) (((x)&0x7FFF)<<(31-(b))) +#define _B16(b,x) (((x)&0xFFFF)<<(31-(b))) +#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b))) +#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b))) +#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b))) +#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b))) +#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b))) +#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b))) +#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b))) +#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b))) +#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b))) +#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b))) +#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b))) +#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b))) +#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b))) +#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b))) +#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b))) + +#if 0 +#define _BGP_DCR_BIC (0x000) /* 0x000-0x1ff: BIC (includes MCCU functionality) */ +#define _BGP_DCR_BIC_END (_BGP_DCR_BIC + 0x1FF) /* 0x1ff: BIC (includes MCCU functionality) */ + +#define _BGP_DCR_SERDES (0x200) /* 0x200-0x3ff: Serdes Config */ +#define _BGP_DCR_SERDES_END (_BGP_DCR_SERDES + 0x1FF) /* 0x3ff: Serdes Config End */ + +#define _BGP_DCR_TEST (0x400) /* 0x400-0x47f: Test Interface */ +#define _BGP_DCR_TEST_END (_BGP_DCR_TEST + 0x07F) /* 0x400-0x47f: Test Interface End */ + +#define _BGP_DCR_L30 (0x500) /* 0x500-0x53f: L3-Cache 0 */ +#define _BGP_DCR_L30_END (_BGP_DCR_L30 + 0x03F) /* 0x53f: L3-Cache 0 End */ + +#define _BGP_DCR_L31 (0x540) /* 0x540-0x57f: L3-Cache 1 */ +#define _BGP_DCR_L31_END (_BGP_DCR_L31 + 0x03F) /* 0x57f: L3-Cache 1 End */ + +#define _BGP_DCR_XAUI (0x580) /* 0x580-0x5bf: XAUI config */ +#define _BGP_DCR_XAUI_END (_BGP_DCR_XAUI + 0x03F) /* 0x5bf: XAUI config End */ + +#define _BGP_DCR_SRAM (0x610) /* 0x610-0x61f: SRAM unit (Includes Lockbox functionality) */ +#define _BGP_DCR_SRAM_END (_BGP_DCR_SRAM + 0x00F) /* 0x61f: SRAM unit (Includes Lockbox functionality) */ + +#define _BGP_DCR_DEVBUS (0x620) /* 0x620-0x62f: DevBus Arbiter */ +#define _BGP_DCR_DEVBUS_END (_BGP_DCR_DEVBUS + 0x00F) /* 0x62f: DevBus Arbiter End */ + +#define _BGP_DCR_NETBUS (0x630) /* 0x630-0x63f: NetBus Arbiter */ +#define _BGP_DCR_NETBUS_END (_BGP_DCR_NETBUS + 0x00F) /* 0x63f: NetBus Arbiter End */ + +#define _BGP_DCR_DMAARB (0x640) /* 0x640-0x64f: DMA arbiter (former PLB slave) */ +#define _BGP_DCR_DMAARB_END (_BGP_DCR_DMAARB + 0x00F) /* 0x64f: DMA arbiter (former PLB slave) End */ + +#define _BGP_DCR_DCRARB (0x650) /* 0x650-0x65f: DCR arbiter */ +#define _BGP_DCR_DCRARB_END (_BGP_DCR_DCRARB + 0x00F) /* 0x65f: DCR arbiter End */ + +#define _BGP_DCR_GLOBINT (0x660) /* 0x660-0x66F: Global Interrupts */ +#define _BGP_DCR_GLOBINT_END (_BGP_DCR_GLOBINT + 0x00F) /* 0x66F: Global Interrupts End */ + +#define _BGP_DCR_CLOCKSTOP (0x670) /* 0x670-0x67F: Clock Stop */ +#define _BGP_DCR_CLOCKSTOP_END (_BGP_DCR_CLOCKSTOP + 0x00F) /* 0x67F: Clock Stop End */ + +#define _BGP_DCR_ENVMON (0x680) /* 0x670-0x67F: Environmental Monitor */ +#define _BGP_DCR_ENVMON_END (_BGP_DCR_ENVMON + 0x00F) /* 0x67F: Env Mon End */ + +#define _BGP_DCR_FPU (0x700) /* 0x700-0x77f: Hummer3 00/01/10/11 */ +#define _BGP_DCR_FPU_END (_BGP_DCR_FPU + 0x07F) /* 0x77f: Hummer3 00/01/10/11 End */ + +#define _BGP_DCR_L2 (0x780) /* 0x780-0x7ff: L2-Cache 00/01/10/11 */ +#define _BGP_DCR_L2_END (_BGP_DCR_L2 + 0x07F) /* 0x7ff: L2-Cache 00/01/10/11 End */ + +#define _BGP_DCR_SNOOP (0x800) /* 0x800-0xbff: Snoop 00/01/10/11 */ +#define _BGP_DCR_SNOOP0 (0x800) /* 0x800-0x8ff: Snoop 00 */ +#define _BGP_DCR_SNOOP1 (0x900) /* 0x900-0x9ff: Snoop 01 */ +#define _BGP_DCR_SNOOP2 (0xA00) /* 0xa00-0xaff: Snoop 10 */ +#define _BGP_DCR_SNOOP3 (0xB00) /* 0xb00-0xbff: Snoop 11 */ +#define _BGP_DCR_SNOOP_END (_BGP_DCR_SNOOP + 0x3FF) /* 0xbff: Snoop 00/01/10/11 End */ + +#define _BGP_DCR_COL (0xc00) /* 0xc00-0xc7f: Tree */ +#define _BGP_DCR_COL_END (_BGP_DCR_COL + 0x07F) /* 0xc7f: Tree End */ + +#define _BGP_DCR_TORUS (0xc80) /* 0xc80-0xcff: Torus */ +#define _BGP_DCR_TORUS_END (_BGP_DCR_TORUS + 0x07F) /* 0xcff: Torus End */ + +#define _BGP_DCR_DMA (0xd00) /* 0xd00-0xdff: DMA */ +#define _BGP_DCR_DMA_END (_BGP_DCR_DMA + 0x0FF) /* 0xdff: DMA End */ + +#define _BGP_DCR_DDR0 (0xe00) /* 0xe00-0xeff: DDR controller 0 */ +#define _BGP_DCR_DDR0_END (_BGP_DCR_DDR0 + 0x0FF) /* 0xeff: DDR controller 0 End */ + +#define _BGP_DCR_DDR1 (0xf00) /* 0xf00-0xfff: DDR controller 1 */ +#define _BGP_DCR_DDR1_END (_BGP_DCR_DDR1 + 0x0FF) /* 0xfff: DDR controller 1 End */ + +#endif + +/* + * Tree + */ + +#define _BGP_TRx_DI (0x00) /* Offset from Tree VCx for Data Injection (WO,Quad) */ +#define _BGP_TRx_HI (0x10) /* Offset from Tree VCx for Header Injection (WO,Word) */ +#define _BGP_TRx_DR (0x20) /* Offset from Tree VCx for Data Reception (RO,Quad) */ +#define _BGP_TRx_HR (0x30) /* Offset from Tree VCx for Header Reception (RO,Word) */ +#define _BGP_TRx_Sx (0x40) /* Offset from Tree VCx for Status (RO,Word) */ +#define _BGP_TRx_SO (0x50) /* Offset from Tree VCx for Status of Other VC (RO,Word) */ + +/* Virtual Addresses for Tree VC0 */ +#define _BGP_TR0_DI (_BGP_VA_COL0 | _BGP_TRx_DI) +#define _BGP_TR0_HI (_BGP_VA_COL0 | _BGP_TRx_HI) +#define _BGP_TR0_DR (_BGP_VA_COL0 | _BGP_TRx_DR) +#define _BGP_TR0_HR (_BGP_VA_COL0 | _BGP_TRx_HR) +#define _BGP_TR0_S0 (_BGP_VA_COL0 | _BGP_TRx_Sx) +#define _BGP_TR0_S1 (_BGP_VA_COL0 | _BGP_TRx_SO) + +/* Virtual Addresses for Tree VC1 */ +#define _BGP_TR1_DI (_BGP_VA_COL1 | _BGP_TRx_DI) +#define _BGP_TR1_HI (_BGP_VA_COL1 | _BGP_TRx_HI) +#define _BGP_TR1_DR (_BGP_VA_COL1 | _BGP_TRx_DR) +#define _BGP_TR1_HR (_BGP_VA_COL1 | _BGP_TRx_HR) +#define _BGP_TR1_S1 (_BGP_VA_COL1 | _BGP_TRx_Sx) +#define _BGP_TR1_S0 (_BGP_VA_COL1 | _BGP_TRx_SO) + +/* Packet Payload: fixed size for all Tree packets */ +#define _BGP_COL_PKT_MAX_BYTES (256) /* bytes in a tree packet */ +#define _BGP_COL_PKT_MAX_SHORT (128) +#define _BGP_COL_PKT_MAX_LONG (64) +#define _BGP_COL_PKT_MAX_LONGLONG (32) +#define _BGP_COL_PKT_MAX_QUADS (16) /* quads in a tree packet */ + + +/* Packet header */ +#define _BGP_TR_HDR_CLASS(x) _B4( 3,x) /* Packet class (virtual tree) */ +#define _BGP_TR_HDR_P2P _BN( 4) /* Point-to-point enable */ +#define _BGP_TR_HDR_IRQ _BN( 5) /* Interrupt request (at receiver) enable */ +#define _BGP_TR_HDR_OPCODE(x) _B3( 8,x) /* ALU opcode */ +#define _BGP_TR_OP_NONE 0x0 /* No operand. Use for ordinary routed packets. */ +#define _BGP_TR_OP_OR 0x1 /* Bitwise logical OR. */ +#define _BGP_TR_OP_AND 0x2 /* Bitwise logical AND. */ +#define _BGP_TR_OP_XOR 0x3 /* Bitwise logical XOR. */ +#define _BGP_TR_OP_MAX 0x5 /* Unsigned integer maximum. */ +#define _BGP_TR_OP_ADD 0x6 /* Unsigned integer addition. */ +#define _BGP_TR_HDR_OPSIZE(x) _B7(15,x) /* Operand size (# of 16-bit words minus 1) */ +#define _BGP_TR_HDR_TAG(x) _B14(29,x) /* User-specified tag (for ordinary routed packets only) */ +#define _BGP_TR_HDR_NADDR(x) _B24(29,x) /* Target address (for P2P packets only) */ +#define _BGP_TR_HDR_CSUM(x) _B2(31,x) /* Injection checksum mode */ +#define _BGP_TR_CSUM_NONE 0x0 /* Do not include packet in checksums. */ +#define _BGP_TR_CSUM_SOME 0x1 /* Include header in header checksum. Include all but */ + /* first quadword in payload checksum. */ +#define _BGP_TR_CSUM_CFG 0x2 /* Include header in header checksum. Include all but */ + /* specified number of 16-bit words in payload checksum. */ +#define _BGP_TR_CSUM_ALL 0x3 /* Include entire packet in checksums. */ + +/* Packet status */ +#define _BGP_TR_STAT_IPY_CNT(x) _B8( 7,x) /* Injection payload qword count */ +#define _BGP_TR_STAT_IHD_CNT(x) _B4(15,x) /* Injection header word count */ +#define _BGP_TR_STAT_RPY_CNT(x) _B8(23,x) /* Reception payload qword count */ +#define _BGP_TR_STAT_IRQ _BN(27) /* One or more reception headers with IRQ bit set */ +#define _BGP_TR_STAT_RHD_CNT(x) _B4(31,x) /* Reception header word count */ + +/* Tree Map of DCR Groupings */ +#define _BGP_DCR_TR_CLASS (_BGP_DCR_COL + 0x00) /* Class Definition Registers (R/W) */ +#define _BGP_DCR_TR_DMA (_BGP_DCR_COL + 0x0C) /* Network Port Diagnostic Memory Access Registers (R/W) */ +#define _BGP_DCR_TR_ARB (_BGP_DCR_COL + 0x10) /* Arbiter Control Registers (R/W) */ +#define _BGP_DCR_TR_CH0 (_BGP_DCR_COL + 0x20) /* Channel 0 Control Registers (R/W) */ +#define _BGP_DCR_TR_CH1 (_BGP_DCR_COL + 0x28) /* Channel 1 Control Registers (R/W) */ +#define _BGP_DCR_TR_CH2 (_BGP_DCR_COL + 0x30) /* Channel 2 Control Registers (R/W) */ +#define _BGP_DCR_TR_GLOB (_BGP_DCR_COL + 0x40) /* Global Registers (R/W) */ +#define _BGP_DCR_TR_REC (_BGP_DCR_COL + 0x44) /* Processor Reception Registers (R/W) */ +#define _BGP_DCR_TR_INJ (_BGP_DCR_COL + 0x48) /* Processor Injection Registers (R/W) */ +#define _BGP_DCR_TR_LCRC (_BGP_DCR_COL + 0x50) /* Link CRC's */ +#define _BGP_DCR_TR_ERR (_BGP_DCR_COL + 0x60) /* Internal error counters */ + + +/* Tree Class Registers */ +/* Note: each route descriptor register contains two class descriptors. "LO" will refer to the lower-numbered */ +/* of the two and "HI" will refer to the higher numbered. */ +#define _BGP_DCR_TR_CLASS_RDR0 (_BGP_DCR_TR_CLASS + 0x00) /* CLASS: Route Descriptor Register for classes 0, 1 */ +#define _BGP_DCR_TR_CLASS_RDR1 (_BGP_DCR_TR_CLASS + 0x01) /* CLASS: Route Descriptor Register for classes 2, 3 */ +#define _BGP_DCR_TR_CLASS_RDR2 (_BGP_DCR_TR_CLASS + 0x02) /* CLASS: Route Descriptor Register for classes 4, 5 */ +#define _BGP_DCR_TR_CLASS_RDR3 (_BGP_DCR_TR_CLASS + 0x03) /* CLASS: Route Descriptor Register for classes 6, 7 */ +#define _BGP_DCR_TR_CLASS_RDR4 (_BGP_DCR_TR_CLASS + 0x04) /* CLASS: Route Descriptor Register for classes 8, 9 */ +#define _BGP_DCR_TR_CLASS_RDR5 (_BGP_DCR_TR_CLASS + 0x05) /* CLASS: Route Descriptor Register for classes 10, 11 */ +#define _BGP_DCR_TR_CLASS_RDR6 (_BGP_DCR_TR_CLASS + 0x06) /* CLASS: Route Descriptor Register for classes 12, 13 */ +#define _BGP_DCR_TR_CLASS_RDR7 (_BGP_DCR_TR_CLASS + 0x07) /* CLASS: Route Descriptor Register for classes 14, 15 */ +#define _TR_CLASS_RDR_LO_SRC2 _BN( 1) /* Class low, source channel 2 */ +#define _TR_CLASS_RDR_LO_SRC1 _BN( 2) /* Class low, source channel 1 */ +#define _TR_CLASS_RDR_LO_SRC0 _BN( 3) /* Class low, source channel 0 */ +#define _TR_CLASS_RDR_LO_TGT2 _BN( 5) /* Class low, target channel 2 */ +#define _TR_CLASS_RDR_LO_TGT1 _BN( 6) /* Class low, target channel 1 */ +#define _TR_CLASS_RDR_LO_TGT0 _BN( 7) /* Class low, target channel 0 */ +#define _TR_CLASS_RDR_LO_SRCL _BN(14) /* Class low, source local client (injection) */ +#define _TR_CLASS_RDR_LO_TGTL _BN(15) /* Class low, target local client (reception) */ +#define _TR_CLASS_RDR_HI_SRC2 _BN(17) /* Class high, source channel 2 */ +#define _TR_CLASS_RDR_HI_SRC1 _BN(18) /* Class high, source channel 1 */ +#define _TR_CLASS_RDR_HI_SRC0 _BN(19) /* Class high, source channel 0 */ +#define _TR_CLASS_RDR_HI_TGT2 _BN(21) /* Class high, target channel 2 */ +#define _TR_CLASS_RDR_HI_TGT1 _BN(22) /* Class high, target channel 1 */ +#define _TR_CLASS_RDR_HI_TGT0 _BN(23) /* Class high, target channel 0 */ +#define _TR_CLASS_RDR_HI_SRCL _BN(30) /* Class high, source local client (injection) */ +#define _TR_CLASS_RDR_HI_TGTL _BN(31) /* Class high, target local client (reception) */ +#define _BGP_DCR_TR_CLASS_ISRA (_BGP_DCR_TR_CLASS + 0x08) /* CLASS: Bits 0-31 of 64-bit idle pattern */ +#define _BGP_DCR_TR_CLASS_ISRB (_BGP_DCR_TR_CLASS + 0x09) /* CLASS: Bits 32-63 of 64-bit idle pattern */ + +/* Tree Network Port Diagnostic Memory Access Registers */ +/* Note: Diagnostic access to processor injection and reception fifos is through TR_REC and TR_INJ registers. */ +#define _BGP_DCR_TR_DMA_DMAA (_BGP_DCR_TR_DMA + 0x00) /* DMA: Diagnostic SRAM address */ +#define _TR_DMA_DMAA_TGT(x) _B3(21,x) /* Target */ +#define _TR_DMAA_TGT_RCV0 0x0 /* Channel 0 receiver */ +#define _TR_DMAA_TGT_RCV1 0x1 /* Channel 1 receiver */ +#define _TR_DMAA_TGT_RCV2 0x2 /* Channel 2 receiver */ +#define _TR_DMAA_TGT_SND0 0x4 /* Channel 0 sender */ +#define _TR_DMAA_TGT_SND1 0x5 /* Channel 1 sender */ +#define _TR_DMAA_TGT_SND2 0x6 /* Channel 2 sender */ +#define _TR_DMA_DMAA_VC(x) _B1(22,x) /* Virtual channel */ +#define _TR_DMA_DMAA_PCKT(x) _B2(24,x) /* Packet number */ +#define _TR_DMA_DMAA_WORD(x) _B7(31,x) /* Word offset within packet */ +#define _BGP_DCR_TR_DMA_DMAD (_BGP_DCR_TR_DMA + 0x01) /* DMA: Diagnostic SRAM data */ +#define _BGP_DCR_TR_DMA_DMADI (_BGP_DCR_TR_DMA + 0x02) /* DMA: Diagnostic SRAM data with address increment */ +#define _TR_DMA_DMAD_ECC(x) _B6(15,x) /* ECC */ +#define _TR_DMA_DMAD_DATA(x) _B16(31,x) /* Data */ +#define _BGP_DCR_TR_DMA_DMAH (_BGP_DCR_TR_DMA + 0x03) /* DMA: Diagnostic header access */ + +/* Tree Arbiter Control Registers */ +#define _BGP_DCR_TR_ARB_RCFG (_BGP_DCR_TR_ARB + 0x00) /* ARB: General router configuration */ +#define _TR_ARB_RCFG_SRC00 _BN( 0) /* Disable source channel 0, VC0 */ +#define _TR_ARB_RCFG_SRC01 _BN( 1) /* Disable source channel 0, VC1 */ +#define _TR_ARB_RCFG_TGT00 _BN( 2) /* Disable target channel 0, VC0 */ +#define _TR_ARB_RCFG_TGT01 _BN( 3) /* Disable target channel 0, VC1 */ +#define _TR_ARB_RCFG_SRC10 _BN( 4) /* Disable source channel 1, VC0 */ +#define _TR_ARB_RCFG_SRC11 _BN( 5) /* Disable source channel 1, VC1 */ +#define _TR_ARB_RCFG_TGT10 _BN( 6) /* Disable target channel 1, VC0 */ +#define _TR_ARB_RCFG_TGT11 _BN( 7) /* Disable target channel 1, VC1 */ +#define _TR_ARB_RCFG_SRC20 _BN( 8) /* Disable source channel 2, VC0 */ +#define _TR_ARB_RCFG_SRC21 _BN( 9) /* Disable source channel 2, VC1 */ +#define _TR_ARB_RCFG_TGT20 _BN(10) /* Disable target channel 2, VC0 */ +#define _TR_ARB_RCFG_TGT21 _BN(11) /* Disable target channel 2, VC1 */ +#define _TR_ARB_RCFG_LB2 _BN(25) /* Channel 2 loopback enable */ +#define _TR_ARB_RCFG_LB1 _BN(26) /* Channel 1 loopback enable */ +#define _TR_ARB_RCFG_LB0 _BN(27) /* Channel 0 loopback enable */ +#define _TR_ARB_RCFG_TOM(x) _B2(29,x) /* Timeout mode */ +#define _TR_RCFG_TOM_NONE 0x0 /* Disable. */ +#define _TR_RCFG_TOM_NRML 0x1 /* Normal mode, irq enabled. */ +#define _TR_RCFG_TOM_WD 0x2 /* Watchdog mode, irq enabled. */ +#define _TR_ARB_RCFG_MAN _BN(30) /* Manual mode (router is disabled). */ +#define _TR_ARB_RCFG_RST _BN(31) /* Full arbiter reset. */ +#define _BGP_DCR_TR_ARB_RTO (_BGP_DCR_TR_ARB + 0x01) /* ARB: 32 MSBs of router timeout value */ +#define _BGP_DCR_TR_ARB_RTIME (_BGP_DCR_TR_ARB + 0x02) /* ARB: Value of router timeout counter */ +#define _BGP_DCR_TR_ARB_RSTAT (_BGP_DCR_TR_ARB + 0x03) /* ARB: General router status */ +#define _TR_ARB_RSTAT_REQ20 _BN( 0) /* Packet available in channel 2, VC0 */ +#define _TR_ARB_RSTAT_REQ10 _BN( 1) /* Packet available in channel 1, VC0 */ +#define _TR_ARB_RSTAT_REQ00 _BN( 2) /* Packet available in channel 0, VC0 */ +#define _TR_ARB_RSTAT_REQP0 _BN( 3) /* Packet available in local client, VC0 */ +#define _TR_ARB_RSTAT_REQ21 _BN( 4) /* Packet available in channel 2, VC1 */ +#define _TR_ARB_RSTAT_REQ11 _BN( 5) /* Packet available in channel 1, VC1 */ +#define _TR_ARB_RSTAT_REQ01 _BN( 6) /* Packet available in channel 0, VC1 */ +#define _TR_ARB_RSTAT_REQP1 _BN( 7) /* Packet available in local client, VC1 */ +#define _TR_ARB_RSTAT_FUL20 _BN( 8) /* Channel 2, VC0 is full */ +#define _TR_ARB_RSTAT_FUL10 _BN( 9) /* Channel 1, VC0 is full */ +#define _TR_ARB_RSTAT_FUL00 _BN(10) /* Channel 0, VC0 is full */ +#define _TR_ARB_RSTAT_FULP0 _BN(11) /* Local client, VC0 is full */ +#define _TR_ARB_RSTAT_FUL21 _BN(12) /* Channel 2, VC1 is full */ +#define _TR_ARB_RSTAT_FUL11 _BN(13) /* Channel 1, VC1 is full */ +#define _TR_ARB_RSTAT_FUL01 _BN(14) /* Channel 0, VC1 is full */ +#define _TR_ARB_RSTAT_FULP1 _BN(15) /* Local client, VC1 is full */ +#define _TR_ARB_RSTAT_MAT20 _BN(16) /* Channel 2, VC0 is mature */ +#define _TR_ARB_RSTAT_MAT10 _BN(17) /* Channel 1, VC0 is mature */ +#define _TR_ARB_RSTAT_MAT00 _BN(18) /* Channel 0, VC0 is mature */ +#define _TR_ARB_RSTAT_MATP0 _BN(19) /* Local client, VC0 is mature */ +#define _TR_ARB_RSTAT_MAT21 _BN(20) /* Channel 2, VC1 is mature */ +#define _TR_ARB_RSTAT_MAT11 _BN(21) /* Channel 1, VC1 is mature */ +#define _TR_ARB_RSTAT_MAT01 _BN(22) /* Channel 0, VC1 is mature */ +#define _TR_ARB_RSTAT_MATP1 _BN(23) /* Local client, VC1 is mature */ +#define _TR_ARB_RSTAT_BSY20 _BN(24) /* Channel 2, VC0 is busy */ +#define _TR_ARB_RSTAT_BSY10 _BN(25) /* Channel 1, VC0 is busy */ +#define _TR_ARB_RSTAT_BSY00 _BN(26) /* Channel 0, VC0 is busy */ +#define _TR_ARB_RSTAT_BSYP0 _BN(27) /* Local client, VC0 is busy */ +#define _TR_ARB_RSTAT_BSY21 _BN(28) /* Channel 2, VC1 is busy */ +#define _TR_ARB_RSTAT_BSY11 _BN(29) /* Channel 1, VC1 is busy */ +#define _TR_ARB_RSTAT_BSY01 _BN(30) /* Channel 0, VC1 is busy */ +#define _TR_ARB_RSTAT_BSYP1 _BN(31) /* Local client, VC1 is busy */ +#define _BGP_DCR_TR_ARB_HD00 (_BGP_DCR_TR_ARB + 0x04) /* ARB: Next header, channel 0, VC0 */ +#define _BGP_DCR_TR_ARB_HD01 (_BGP_DCR_TR_ARB + 0x05) /* ARB: Next header, channel 0, VC1 */ +#define _BGP_DCR_TR_ARB_HD10 (_BGP_DCR_TR_ARB + 0x06) /* ARB: Next header, channel 1, VC0 */ +#define _BGP_DCR_TR_ARB_HD11 (_BGP_DCR_TR_ARB + 0x07) /* ARB: Next header, channel 1, VC1 */ +#define _BGP_DCR_TR_ARB_HD20 (_BGP_DCR_TR_ARB + 0x08) /* ARB: Next header, channel 2, VC0 */ +#define _BGP_DCR_TR_ARB_HD21 (_BGP_DCR_TR_ARB + 0x09) /* ARB: Next header, channel 2, VC1 */ +#define _BGP_DCR_TR_ARB_HDI0 (_BGP_DCR_TR_ARB + 0x0A) /* ARB: Next header, injection, VC0 */ +#define _BGP_DCR_TR_ARB_HDI1 (_BGP_DCR_TR_ARB + 0x0B) /* ARB: Next header, injection, VC1 */ +#define _BGP_DCR_TR_ARB_FORCEC (_BGP_DCR_TR_ARB + 0x0C) /* ARB: Force control for manual mode */ +#define _TR_ARB_FORCEC_CH0 _BN( 0) /* Channel 0 is a target */ +#define _TR_ARB_FORCEC_CH1 _BN( 1) /* Channel 1 is a target */ +#define _TR_ARB_FORCEC_CH2 _BN( 2) /* Channel 2 is a target */ +#define _TR_ARB_FORCEC_P _BN( 3) /* Local client is a target */ +#define _TR_ARB_FORCEC_ALU _BN( 4) /* ALU is a target */ +#define _TR_ARB_FORCEC_RT _BN( 5) /* Force route immediately */ +#define _TR_ARB_FORCEC_STK _BN( 6) /* Sticky route: always force route */ +#define _BGP_DCR_TR_ARB_FORCER (_BGP_DCR_TR_ARB + 0x0D) /* ARB: Forced route for manual mode */ +#define _TR_ARB_FORCER_CH20 _BN( 0) /* Channel 2 is a source for channel 0 */ +#define _TR_ARB_FORCER_CH10 _BN( 1) /* Channel 1 is a source for channel 0 */ +#define _TR_ARB_FORCER_CH00 _BN( 2) /* Channel 0 is a source for channel 0 */ +#define _TR_ARB_FORCER_CHP0 _BN( 3) /* Local client is a source for channel 0 */ +#define _TR_ARB_FORCER_CHA0 _BN( 4) /* ALU is a source for channel 0 */ +#define _TR_ARB_FORCER_VC0 _BN( 5) /* VC that is source for channel 0 */ +#define _TR_ARB_FORCER_CH21 _BN( 6) /* Channel 2 is a source for channel 1 */ +#define _TR_ARB_FORCER_CH11 _BN( 7) /* Channel 1 is a source for channel 1 */ +#define _TR_ARB_FORCER_CH01 _BN( 8) /* Channel 0 is a source for channel 1 */ +#define _TR_ARB_FORCER_CHP1 _BN( 9) /* Local client is a source for channel 1 */ +#define _TR_ARB_FORCER_CHA1 _BN(10) /* ALU is a source for channel 1 */ +#define _TR_ARB_FORCER_VC1 _BN(11) /* VC that is source for channel 1 */ +#define _TR_ARB_FORCER_CH22 _BN(12) /* Channel 2 is a source for channel 2 */ +#define _TR_ARB_FORCER_CH12 _BN(13) /* Channel 1 is a source for channel 2 */ +#define _TR_ARB_FORCER_CH02 _BN(14) /* Channel 0 is a source for channel 2 */ +#define _TR_ARB_FORCER_CHP2 _BN(15) /* Local client is a source for channel 2 */ +#define _TR_ARB_FORCER_CHA2 _BN(16) /* ALU is a source for channel 2 */ +#define _TR_ARB_FORCER_VC2 _BN(17) /* VC that is source for channel 2 */ +#define _TR_ARB_FORCER_CH2P _BN(18) /* Channel 2 is a source for local client */ +#define _TR_ARB_FORCER_CH1P _BN(19) /* Channel 1 is a source for local client */ +#define _TR_ARB_FORCER_CH0P _BN(20) /* Channel 0 is a source for local client */ +#define _TR_ARB_FORCER_CHPP _BN(21) /* Local client is a source for local client */ +#define _TR_ARB_FORCER_CHAP _BN(22) /* ALU is a source for local client */ +#define _TR_ARB_FORCER_VCP _BN(23) /* VC that is source for local client */ +#define _TR_ARB_FORCER_CH2A _BN(24) /* Channel 2 is a source for ALU */ +#define _TR_ARB_FORCER_CH1A _BN(25) /* Channel 1 is a source for ALU */ +#define _TR_ARB_FORCER_CH0A _BN(26) /* Channel 0 is a source for ALU */ +#define _TR_ARB_FORCER_CHPA _BN(27) /* Local client is a source for ALU */ +#define _TR_ARB_FORCER_CHAA _BN(28) /* ALU is a source for ALU */ +#define _TR_ARB_FORCER_VCA _BN(29) /* VC that is source for ALU */ +#define _BGP_DCR_TR_ARB_FORCEH (_BGP_DCR_TR_ARB + 0x0E) /* ARB: Forced header for manual mode */ +#define _BGP_DCR_TR_ARB_XSTAT (_BGP_DCR_TR_ARB + 0x0F) /* ARB: Extended router status */ +#define _TR_ARB_XSTAT_BLK20 _BN( 0) /* Request from channel 2, VC0 is blocked */ +#define _TR_ARB_XSTAT_BLK10 _BN( 1) /* Request from channel 1, VC0 is blocked */ +#define _TR_ARB_XSTAT_BLK00 _BN( 2) /* Request from channel 0, VC0 is blocked */ +#define _TR_ARB_XSTAT_BLKP0 _BN( 3) /* Request from local client, VC0 is blocked */ +#define _TR_ARB_XSTAT_BLK21 _BN( 4) /* Request from channel 2, VC1 is blocked */ +#define _TR_ARB_XSTAT_BLK11 _BN( 5) /* Request from channel 1, VC1 is blocked */ +#define _TR_ARB_XSTAT_BLK01 _BN( 6) /* Request from channel 0, VC1 is blocked */ +#define _TR_ARB_XSTAT_BLKP1 _BN( 7) /* Request from local client, VC1 is blocked */ +#define _TR_ARB_XSTAT_BSYR2 _BN( 8) /* Channel 2 receiver is busy */ +#define _TR_ARB_XSTAT_BSYR1 _BN( 9) /* Channel 1 receiver is busy */ +#define _TR_ARB_XSTAT_BSYR0 _BN(10) /* Channel 0 receiver is busy */ +#define _TR_ARB_XSTAT_BSYPI _BN(11) /* Local client injection is busy */ +#define _TR_ARB_XSTAT_BSYA _BN(12) /* ALU is busy */ +#define _TR_ARB_XSTAT_BSYS2 _BN(13) /* Channel 2 sender is busy */ +#define _TR_ARB_XSTAT_BSYS1 _BN(14) /* Channel 1 sender is busy */ +#define _TR_ARB_XSTAT_BSYS0 _BN(15) /* Channel 0 sender is busy */ +#define _TR_ARB_XSTAT_BSYPR _BN(16) /* Local client reception is busy */ +#define _TR_ARB_XSTAT_ARB_TO(x) _B15(31,x) /* Greedy-Arbitration timeout */ + +/* Tree Channel 0 Control Registers */ +#define _BGP_DCR_TR_CH0_RSTAT (_BGP_DCR_TR_CH0 + 0x00) /* CH0: Receiver status */ +#define _TR_RSTAT_RCVERR _BN( 0) /* Receiver error */ +#define _TR_RSTAT_LHEXP _BN( 1) /* Expect link header */ +#define _TR_RSTAT_PH0EXP _BN( 2) /* Expect packet header 0 */ +#define _TR_RSTAT_PH1EXP _BN( 3) /* Expect packet header 1 */ +#define _TR_RSTAT_PDRCV _BN( 4) /* Receive packet data */ +#define _TR_RSTAT_CWEXP _BN( 5) /* Expect packet control word */ +#define _TR_RSTAT_CSEXP _BN( 6) /* Expect packet checksum */ +#define _TR_RSTAT_SCRBRD0 _B8(14,0xff) /* VC0 fifo scoreboard */ +#define _TR_RSTAT_SCRBRD1 _B8(22,0xff) /* VC1 fifo scoreboard */ +#define _TR_RSTAT_RMTSTAT _B9(31,0x1ff) /* Remote status */ +#define _BGP_DCR_TR_CH0_RCTRL (_BGP_DCR_TR_CH0 + 0x01) /* CH0: Receiver control */ +#define _TR_RCTRL_FERR _BN( 0) /* Force receiver into error state */ +#define _TR_RCTRL_RST _BN( 1) /* Reset all internal pointers */ +#define _TR_RCTRL_FRZ0 _BN( 2) /* Freeze VC0 */ +#define _TR_RCTRL_FRZ1 _BN( 3) /* Freeze VC1 */ +#define _TR_RCTRL_RCVALL _BN( 4) /* Disable receiver CRC check and accept all packets */ +#define _BGP_DCR_TR_CH0_SSTAT (_BGP_DCR_TR_CH0 + 0x02) /* CH0: Sender status */ +#define _TR_SSTAT_SYNC _BN( 0) /* Phase of sender */ +#define _TR_SSTAT_ARB _BN( 1) /* Arbitrating */ +#define _TR_SSTAT_PH0SND _BN( 2) /* Sending packet header 0 */ +#define _TR_SSTAT_PH1SND _BN( 3) /* Sending packet header 1 */ +#define _TR_SSTAT_PDSND _BN( 4) /* Sending packet payload */ +#define _TR_SSTAT_CWSND _BN( 5) /* Sending packet control word */ +#define _TR_SSTAT_CSSND _BN( 6) /* Sending packet checksum */ +#define _TR_SSTAT_IDLSND _BN( 7) /* Sending idle packet */ +#define _TR_SSTAT_RPTR0 _B3(10,0x7) /* VC0 read pointer */ +#define _TR_SSTAT_WPTR0 _B3(13,0x7) /* VC0 write pointer */ +#define _TR_SSTAT_RPTR1 _B3(16,0x7) /* VC1 read pointer */ +#define _TR_SSTAT_WPTR1 _B3(19,0x7) /* VC1 write pointer */ +#define _BGP_DCR_TR_CH0_SCTRL (_BGP_DCR_TR_CH0 + 0x03) /* CH0: Sender control */ +#define _TR_SCTRL_SYNC _BN( 0) /* Force sender to send SYNC */ +#define _TR_SCTRL_IDLE _BN( 1) /* Force sender to send IDLE */ +#define _TR_SCTRL_RST _BN( 2) /* Reset all internal pointers */ +#define _TR_SCTRL_INVMSB _BN( 3) /* Invert MSB of class for loopback packets */ +#define _TR_SCTRL_OFF _BN( 4) /* Disable (black hole) the sender */ +#define _BGP_DCR_TR_CH0_TNACK (_BGP_DCR_TR_CH0 + 0x04) /* CH0: Tolerated dalay from NACK to ACK status */ +#define _BGP_DCR_TR_CH0_CNACK (_BGP_DCR_TR_CH0 + 0x05) /* CH0: Time since last NACK received */ +#define _BGP_DCR_TR_CH0_TIDLE (_BGP_DCR_TR_CH0 + 0x06) /* CH0: Frequency to send IDLE packets */ +#define _BGP_DCR_TR_CH0_CIDLE (_BGP_DCR_TR_CH0 + 0x07) /* CH0: Time since last IDLE sent */ + +/* Tree Channel 1 Control Registers */ +/* Note: Register definitions are the same as those of channel 0. */ +#define _BGP_DCR_TR_CH1_RSTAT (_BGP_DCR_TR_CH1 + 0x00) /* CH1: Receiver status */ +#define _BGP_DCR_TR_CH1_RCTRL (_BGP_DCR_TR_CH1 + 0x01) /* CH1: Receiver control */ +#define _BGP_DCR_TR_CH1_SSTAT (_BGP_DCR_TR_CH1 + 0x02) /* CH1: Sender status */ +#define _BGP_DCR_TR_CH1_SCTRL (_BGP_DCR_TR_CH1 + 0x03) /* CH1: Sender control */ +#define _BGP_DCR_TR_CH1_TNACK (_BGP_DCR_TR_CH1 + 0x04) /* CH1: Tolerated dalay from NACK to ACK status */ +#define _BGP_DCR_TR_CH1_CNACK (_BGP_DCR_TR_CH1 + 0x05) /* CH1: Time since last NACK received */ +#define _BGP_DCR_TR_CH1_TIDLE (_BGP_DCR_TR_CH1 + 0x06) /* CH1: Frequency to send IDLE packets */ +#define _BGP_DCR_TR_CH1_CIDLE (_BGP_DCR_TR_CH1 + 0x07) /* CH1: Time since last IDLE sent */ + +/* Tree Channel 2 Control Registers */ +/* Note: Register definitions are the same as those of channel 0. */ +#define _BGP_DCR_TR_CH2_RSTAT (_BGP_DCR_TR_CH2 + 0x00) /* CH2: Receiver status */ +#define _BGP_DCR_TR_CH2_RCTRL (_BGP_DCR_TR_CH2 + 0x01) /* CH2: Receiver control */ +#define _BGP_DCR_TR_CH2_SSTAT (_BGP_DCR_TR_CH2 + 0x02) /* CH2: Sender status */ +#define _BGP_DCR_TR_CH2_SCTRL (_BGP_DCR_TR_CH2 + 0x03) /* CH2: Sender control */ +#define _BGP_DCR_TR_CH2_TNACK (_BGP_DCR_TR_CH2 + 0x04) /* CH2: Tolerated dalay from NACK to ACK status */ +#define _BGP_DCR_TR_CH2_CNACK (_BGP_DCR_TR_CH2 + 0x05) /* CH2: Time since last NACK received */ +#define _BGP_DCR_TR_CH2_TIDLE (_BGP_DCR_TR_CH2 + 0x06) /* CH2: Frequency to send IDLE packets */ +#define _BGP_DCR_TR_CH2_CIDLE (_BGP_DCR_TR_CH2 + 0x07) /* CH2: Time since last IDLE sent */ + +/* Tree Global Registers */ +#define _BGP_DCR_TR_GLOB_FPTR (_BGP_DCR_TR_GLOB + 0x00) /* GLOB: Fifo Pointer Register */ +#define _TR_GLOB_FPTR_IPY0(x) _B3( 3,x) /* VC0 injection payload FIFO packet write pointer */ +#define _TR_GLOB_FPTR_IHD0(x) _B3( 7,x) /* VC0 injection header FIFO packet write pointer */ +#define _TR_GLOB_FPTR_IPY1(x) _B3(11,x) /* VC1 injection payload FIFO packet write pointer */ +#define _TR_GLOB_FPTR_IHD1(x) _B3(15,x) /* VC1 injection header FIFO packet write pointer */ +#define _TR_GLOB_FPTR_RPY0(x) _B3(19,x) /* VC0 reception payload FIFO packet read pointer */ +#define _TR_GLOB_FPTR_RHD0(x) _B3(23,x) /* VC0 reception header FIFO packet read pointer */ +#define _TR_GLOB_FPTR_RPY1(x) _B3(27,x) /* VC1 reception payload FIFO packet read pointer */ +#define _TR_GLOB_FPTR_RHD1(x) _B3(31,x) /* VC1 reception header FIFO packet read pointer */ +#define _BGP_DCR_TR_GLOB_NADDR (_BGP_DCR_TR_GLOB + 0x01) /* GLOB: Node Address Register */ +#define _TR_GLOB_NADDR(x) _B24(31,x) /* Node address */ +#define _BGP_DCR_TR_GLOB_VCFG0 (_BGP_DCR_TR_GLOB + 0x02) /* GLOB: VC0 Configuration Register (use macros below) */ +#define _BGP_DCR_TR_GLOB_VCFG1 (_BGP_DCR_TR_GLOB + 0x03) /* GLOB: VC1 Configuration Register */ +#define _TR_GLOB_VCFG_RCVALL _BN( 0) /* Disable P2P reception filering */ +#define _TR_GLOB_VCFG_CSUMX(x) _B8(15,x) /* Injection checksum mode 2 exclusion */ +#define _TR_GLOB_VCFG_RWM(x) _B3(23,x) /* Payload reception FIFO watermark */ +#define _TR_GLOB_VCFG_IWM(x) _B3(31,x) /* Payload injection FIFO watermark */ + +/* Tree Processor Reception Registers */ +#define _BGP_DCR_TR_REC_PRXF (_BGP_DCR_TR_REC + 0x00) /* REC: Receive Exception Flag Register */ +#define _BGP_DCR_TR_REC_PRXEN (_BGP_DCR_TR_REC + 0x01) /* REC: Receive Exception Enable Register */ +#define _TR_REC_PRX_APAR0 _BN( 8) /* P0 address parity error */ +#define _TR_REC_PRX_APAR1 _BN( 9) /* P1 address parity error */ +#define _TR_REC_PRX_ALIGN0 _BN(10) /* P0 address alignment error */ +#define _TR_REC_PRX_ALIGN1 _BN(11) /* P1 address alignment error */ +#define _TR_REC_PRX_ADDR0 _BN(12) /* P0 bad (unrecognized) address error */ +#define _TR_REC_PRX_ADDR1 _BN(13) /* P1 bad (unrecognized) address error */ +#define _TR_REC_PRX_COLL _BN(14) /* FIFO read collision error */ +#define _TR_REC_PRX_UE _BN(15) /* Uncorrectable SRAM ECC error */ +#define _TR_REC_PRX_PFU0 _BN(26) /* VC0 payload FIFO under-run error */ +#define _TR_REC_PRX_PFU1 _BN(27) /* VC1 payload FIFO under-run error */ +#define _TR_REC_PRX_HFU0 _BN(28) /* VC0 header FIFO under-run error */ +#define _TR_REC_PRX_HFU1 _BN(29) /* VC1 header FIFO under-run error */ +#define _TR_REC_PRX_WM0 _BN(30) /* VC0 payload FIFO above watermark */ +#define _TR_REC_PRX_WM1 _BN(31) /* VC1 payload FIFO above watermark */ +#define _BGP_DCR_TR_REC_PRDA (_BGP_DCR_TR_REC + 0x02) /* REC: Receive Diagnostic Address Register */ +#define _TR_PRDA_VC(x) _B1(21,x) /* Select VC to access */ +#define _TR_PRDA_MAC(x) _B1(22,x) /* Select SRAM macro to access */ +#define _TR_PRDA_LINE(x) _B7(29,x) /* Select line in SRAM or RA */ +#define _TR_PRDA_TGT(x) _B2(31,x) /* Select target sub-line or RA */ +#define _TR_PRDA_TGT_LO 0x0 /* Least significant word of SRAM */ +#define _TR_PRDA_TGT_HI 0x1 /* Most significant word of SRAM */ +#define _TR_PRDA_TGT_ECC 0x2 /* ECC syndrome of SRAM */ +#define _TR_PRDA_TGT_HDR 0x3 /* Header fifo */ +#define _BGP_DCR_TR_REC_PRDD (_BGP_DCR_TR_REC + 0x03) /* REC: Receive Diagnostic Data Register */ +#define _TR_PRDD_ECC(x) _B8(31,x) /* ECC */ +#define _TR_PRDD_DATA(x) (x) /* Data */ + +/* Tree Processor Injection Registers */ +#define _BGP_DCR_TR_INJ_PIXF (_BGP_DCR_TR_INJ + 0x00) /* INJ: Injection Exception Flag Register */ +#define _BGP_DCR_TR_INJ_PIXEN (_BGP_DCR_TR_INJ + 0x01) /* INJ: Injection Exception Enable Register */ +#define _TR_INJ_PIX_APAR0 _BN( 6) /* P0 address parity error */ +#define _TR_INJ_PIX_APAR1 _BN( 7) /* P1 address parity error */ +#define _TR_INJ_PIX_ALIGN0 _BN( 8) /* P0 address alignment error */ +#define _TR_INJ_PIX_ALIGN1 _BN( 9) /* P1 address alignment error */ +#define _TR_INJ_PIX_ADDR0 _BN(10) /* P0 bad (unrecognized) address error */ +#define _TR_INJ_PIX_ADDR1 _BN(11) /* P1 bad (unrecognized) address error */ +#define _TR_INJ_PIX_DPAR0 _BN(12) /* P0 data parity error */ +#define _TR_INJ_PIX_DPAR1 _BN(13) /* P1 data parity error */ +#define _TR_INJ_PIX_COLL _BN(14) /* FIFO write collision error */ +#define _TR_INJ_PIX_UE _BN(15) /* Uncorrectable SRAM ECC error */ +#define _TR_INJ_PIX_PFO0 _BN(25) /* VC0 payload FIFO overflow error */ +#define _TR_INJ_PIX_PFO1 _BN(26) /* VC1 payload FIFO overflow error */ +#define _TR_INJ_PIX_HFO0 _BN(27) /* VC0 header FIFO overflow error */ +#define _TR_INJ_PIX_HFO1 _BN(28) /* VC1 header FIFO overflow error */ +#define _TR_INJ_PIX_WM0 _BN(29) /* VC0 payload FIFO at or below watermark */ +#define _TR_INJ_PIX_WM1 _BN(30) /* VC1 payload FIFO at or below watermark */ +#define _TR_INJ_PIX_ENABLE _BN(31) /* Injection interface enable (if enabled in PIXEN) */ +#define _BGP_DCR_TR_INJ_PIDA (_BGP_DCR_TR_INJ + 0x02) /* INJ: Injection Diagnostic Address Register */ +/* Use _TR_PRDA_* defined above. */ +#define _BGP_DCR_TR_INJ_PIDD (_BGP_DCR_TR_INJ + 0x03) /* INJ: Injection Diagnostic Data Register */ +/* Use _TR_PRDD_* defined above. */ +#define _BGP_DCR_TR_INJ_CSPY0 (_BGP_DCR_TR_INJ + 0x04) /* INJ: VC0 payload checksum */ +#define _BGP_DCR_TR_INJ_CSHD0 (_BGP_DCR_TR_INJ + 0x05) /* INJ: VC0 header checksum */ +#define _BGP_DCR_TR_INJ_CSPY1 (_BGP_DCR_TR_INJ + 0x06) /* INJ: VC1 payload checksum */ +#define _BGP_DCR_TR_INJ_CSHD1 (_BGP_DCR_TR_INJ + 0x07) /* INJ: VC1 header checksum */ + + +/* Link CRC's for the receivers 0..2 (vc0,1) */ +#define _BGP_DCR_TR_LCRC_R00 (_BGP_DCR_TR_LCRC + 0) +#define _BGP_DCR_TR_LCRC_R01 (_BGP_DCR_TR_LCRC + 1) +#define _BGP_DCR_TR_LCRC_R10 (_BGP_DCR_TR_LCRC + 2) +#define _BGP_DCR_TR_LCRC_R11 (_BGP_DCR_TR_LCRC + 3) +#define _BGP_DCR_TR_LCRC_R20 (_BGP_DCR_TR_LCRC + 4) +#define _BGP_DCR_TR_LCRC_R21 (_BGP_DCR_TR_LCRC + 5) + +/* Link CRC'c for the senders 0..2 (vc0,1) */ +#define _BGP_DCR_TR_LCRC_S00 (_BGP_DCR_TR_LCRC + 8) +#define _BGP_DCR_TR_LCRC_S01 (_BGP_DCR_TR_LCRC + 9) +#define _BGP_DCR_TR_LCRC_S10 (_BGP_DCR_TR_LCRC + 10) +#define _BGP_DCR_TR_LCRC_S11 (_BGP_DCR_TR_LCRC + 11) +#define _BGP_DCR_TR_LCRC_S20 (_BGP_DCR_TR_LCRC + 12) +#define _BGP_DCR_TR_LCRC_S21 (_BGP_DCR_TR_LCRC + 13) + +/* Internal error counters and thresholds */ +#define _BGP_DCR_TR_ERR_R0_CRC (_BGP_DCR_TR_ERR + 0x00) /* CH0: Receiver link CRC errors detected */ +#define _BGP_DCR_TR_ERR_R0_CE (_BGP_DCR_TR_ERR + 0x01) /* CH0: Receiver SRAM errors corrected */ +#define _BGP_DCR_TR_ERR_S0_RETRY (_BGP_DCR_TR_ERR + 0x02) /* CH0: Sender link retransmissions */ +#define _BGP_DCR_TR_ERR_S0_CE (_BGP_DCR_TR_ERR + 0x03) /* CH0: Sender SRAM errors corrected */ +#define _BGP_DCR_TR_ERR_R1_CRC (_BGP_DCR_TR_ERR + 0x04) /* CH1: Receiver link CRC errors detected */ +#define _BGP_DCR_TR_ERR_R1_CE (_BGP_DCR_TR_ERR + 0x05) /* CH1: Receiver SRAM errors corrected */ +#define _BGP_DCR_TR_ERR_S1_RETRY (_BGP_DCR_TR_ERR + 0x06) /* CH1: Sender link retransmissions */ +#define _BGP_DCR_TR_ERR_S1_CE (_BGP_DCR_TR_ERR + 0x07) /* CH1: Sender SRAM errors corrected */ +#define _BGP_DCR_TR_ERR_R2_CRC (_BGP_DCR_TR_ERR + 0x08) /* CH2: Receiver link CRC errors detected */ +#define _BGP_DCR_TR_ERR_R2_CE (_BGP_DCR_TR_ERR + 0x09) /* CH2: Receiver SRAM errors corrected */ +#define _BGP_DCR_TR_ERR_S2_RETRY (_BGP_DCR_TR_ERR + 0x0A) /* CH2: Sender link retransmissions */ +#define _BGP_DCR_TR_ERR_S2_CE (_BGP_DCR_TR_ERR + 0x0B) /* CH2: Sender SRAM errors corrected */ +#define _BGP_DCR_TR_ERR_INJ_SE (_BGP_DCR_TR_ERR + 0x0C) /* INJ: SRAM errors (correctable and uncorrectable) */ +#define _BGP_DCR_TR_ERR_REC_SE (_BGP_DCR_TR_ERR + 0x0D) /* REC: SRAM errors (correctable and uncorrectable) */ + +#define _BGP_DCR_TR_ERR_R0_CRC_T (_BGP_DCR_TR_ERR + 0x10) /* Interrupt thresholds for corresponding error */ +#define _BGP_DCR_TR_ERR_R0_CE_T (_BGP_DCR_TR_ERR + 0x11) /* counters. */ +#define _BGP_DCR_TR_ERR_S0_RETRY_T (_BGP_DCR_TR_ERR + 0x12) +#define _BGP_DCR_TR_ERR_S0_CE_T (_BGP_DCR_TR_ERR + 0x13) +#define _BGP_DCR_TR_ERR_R1_CRC_T (_BGP_DCR_TR_ERR + 0x14) +#define _BGP_DCR_TR_ERR_R1_CE_T (_BGP_DCR_TR_ERR + 0x15) +#define _BGP_DCR_TR_ERR_S1_RETRY_T (_BGP_DCR_TR_ERR + 0x16) +#define _BGP_DCR_TR_ERR_S1_CE_T (_BGP_DCR_TR_ERR + 0x17) +#define _BGP_DCR_TR_ERR_R2_CRC_T (_BGP_DCR_TR_ERR + 0x18) +#define _BGP_DCR_TR_ERR_R2_CE_T (_BGP_DCR_TR_ERR + 0x19) +#define _BGP_DCR_TR_ERR_S2_RETRY_T (_BGP_DCR_TR_ERR + 0x1A) +#define _BGP_DCR_TR_ERR_S2_CE_T (_BGP_DCR_TR_ERR + 0x1B) +#define _BGP_DCR_TR_ERR_INJ_SE_T (_BGP_DCR_TR_ERR + 0x1C) +#define _BGP_DCR_TR_ERR_REC_SE_T (_BGP_DCR_TR_ERR + 0x1D) + +/* For _bgp_tree_configure_class */ +#define _BGP_COL_RDR_NUM (16) /* classes are 0..15 */ + +/* The following interface allows for fine-grain control of the RDR register */ +/* contents. Use bit-wize OR'd together to create a route specification. */ +#define _BGP_COL_RDR_SRC0 (0x1000) /* Bit Number 3 (MSb is bit number 0) */ +#define _BGP_COL_RDR_SRC1 (0x2000) /* Bit Number 2 */ +#define _BGP_COL_RDR_SRC2 (0x4000) /* Bit Number 1 */ +#define _BGP_COL_RDR_SRCL (0x0002) /* Bit Number 14 */ +#define _BGP_COL_RDR_TGT0 (0x0100) /* Bit Number 7 */ +#define _BGP_COL_RDR_TGT1 (0x0200) /* Bit Number 6 */ +#define _BGP_COL_RDR_TGT2 (0x0400) /* Bit Number 5 */ +#define _BGP_COL_RDR_TGTL (0x0001) /* Bit Number 15 */ + +/* OR of all valid Source and Target bits for SrtTgtEnable validation. */ +#define _BGP_COL_RDR_ACCEPT (0x7703) + + + + + + + + + + + + + + +/********************************************************************** + * + * Torus + * + **********************************************************************/ + +#define _BGP_DCR_DMA_NUM_VALID_ADDR 8 /* g range */ +#define _BGP_DCR_iDMA_NUM_TS_FIFO_WM 2 /* j range */ +#define _BGP_DCR_rDMA_NUM_TS_FIFO_WM 4 /* p range */ +#define _BGP_DCR_iDMA_NUM_FIFO_REGS 4 /* i range */ +#define _BGP_DCR_iDMA_NUM_FIFO_MAP_REGS 32 /* k range */ + + +/* use g for repeated 8X, i repeated 4x, j repeated 2X, k repeated 32x, p repeated 4x */ + +/* ------------------- */ +/* ---- Controls ----- */ +/* ------------------- */ + +#define _BGP_DCR_DMA_RESET (_BGP_DCR_DMA+0x00) /* All bits reset to 1. */ +#define _DMA_RESET_DCR _BN( 0) /* Reset the DMA's DCR unit */ +#define _DMA_RESET_PQUE _BN( 1) /* Reset the DMA's Processor Queue unit */ +#define _DMA_RESET_IMFU _BN( 2) /* Reset the DMA's Injection Memory Fifo/Counter Unit */ +#define _DMA_RESET_RMFU _BN( 3) /* Reset the DMA's Reception Memory Fifo/Counter Unit */ +#define _DMA_RESET_LF _BN( 4) /* Reset the DMA's Local Fifo */ +#define _DMA_RESET_ITIU _BN( 5) /* Reset the DMA's Injection Torus Interface Unit */ +#define _DMA_RESET_ICONU _BN( 6) /* Reset the DMA's Injection Transfer Control Unit */ +#define _DMA_RESET_IDAU _BN( 7) /* Reset the DMA's Injection Data Alignment Unit */ +#define _DMA_RESET_IMIU _BN( 8) /* Reset the DMA's Injection L3 Memory Interface Unit */ +#define _DMA_RESET_RTIU _BN( 9) /* Reset the DMA's Reception Torus Interface Unit */ +#define _DMA_RESET_RCONU _BN(10) /* Reset the DMA's Reception Transfer Control Unit */ +#define _DMA_RESET_RDAU _BN(11) /* Reset the DMA's Reception Data Alignment Unit */ +#define _DMA_RESET_RMIU _BN(12) /* Reset the DMA's Reception L3 Memory Interface Unit */ +#define _DMA_RESET_PF _BN(13) /* Reset the DMA's Torus Prefetch Unit */ + /* 14-30 reserved. */ +#define _DMA_RESET_LNKCHK _BN(31) /* Reset the DMA's Torus Link Packet Capture Unit */ + +#define _BGP_DCR_DMA_BASE_CONTROL (_BGP_DCR_DMA+0x01) +#define _DMA_BASE_CONTROL_USE_DMA _BN( 0) /* Use DMA and *not* the Torus if 1, reset state is 0. */ +#define _DMA_BASE_CONTROL_STORE_HDR _BN( 1) /* Store DMA Headers in Reception Header Fifo (debugging) */ +#define _DMA_BASE_CONTROL_PF_DIS _BN( 2) /* Disable Torus Prefetch Unit (should be 0) */ +#define _DMA_BASE_CONTROL_L3BURST_EN _BN( 3) /* Enable L3 Burst when 1 (should be enabled, except for debugging) */ +#define _DMA_BASE_CONTROL_ITIU_EN _BN( 4) /* Enable Torus Injection Data Transfer Unit (never make this zero) */ +#define _DMA_BASE_CONTROL_RTIU_EN _BN( 5) /* Enable Torus Reception Data Transfer Unit */ +#define _DMA_BASE_CONTROL_IMFU_EN _BN( 6) /* Enable DMA Injection Fifo Unit Arbiter */ +#define _DMA_BASE_CONTROL_RMFU_EN _BN( 7) /* Enable DMA Reception fifo Unit Arbiter */ +#define _DMA_BASE_CONTROL_L3PF_DIS _BN( 8) /* Disable L3 Read Prefetch (should be 0) */ + /* 9..27 reserved. */ +#define _DMA_BASE_CONTROL_REC_FIFO_FULL_STOP_RDMA _BN( 28) /* DD2 Only, ECO 777, RDMA stops when fifo is full */ +#define _DMA_BASE_CONTROL_REC_FIFO_CROSSTHRESH_NOTSTICKY _BN( 29) /* DD2 Only, ECO 777, Rec. Fifo Threshold crossed is not sticky */ +#define _DMA_BASE_CONTROL_INJ_FIFO_CROSSTHRESH_NOTSTICKY _BN( 30) /* DD2 Only, ECO 777, Inj. Fifo Threshold crossed is not sticky */ + /* 31 - ECO 653, leave at 0 */ +#define _BGP_DCR_DMA_BASE_CONTROL_INIT ( _DMA_BASE_CONTROL_USE_DMA | \ + _DMA_BASE_CONTROL_L3BURST_EN | \ + _DMA_BASE_CONTROL_ITIU_EN | \ + _DMA_BASE_CONTROL_RTIU_EN | \ + _DMA_BASE_CONTROL_IMFU_EN | \ + _DMA_BASE_CONTROL_RMFU_EN) + +/* g in the interval [0:7]: */ +/* 32bit 16Byte aligned Physical Addresses containing (0..3 of UA | 0..27 of PA). */ +#define _BGP_DCR_iDMA_MIN_VALID_ADDR(g) (_BGP_DCR_DMA+((2*(g))+0x02)) +#define _BGP_DCR_iDMA_MAX_VALID_ADDR(g) (_BGP_DCR_DMA+((2*(g))+0x03)) + +#define _BGP_DCR_iDMA_INJ_RANGE_TLB (_BGP_DCR_DMA+0x12) +#define _iDMA_INT_RANGE_TLB_L3CIN(r) _BN( 0+((r)*4)) /* (oops typo) 'r' in {0..7} Bit 0 of each range is L3 Cache Inhibit */ +#define _iDMA_INT_RANGE_TLB_L3SCR(r) _BN( 1+((r)*4)) /* (oops typo) 'r' in {0..7} Bit 1 of each range is L3 ScratchPad. */ +#define _iDMA_INJ_RANGE_TLB_L3CIN(r) _BN( 0+((r)*4)) /* 'r' in {0..7} Bit 0 of each range is L3 Cache Inhibit */ +#define _iDMA_INJ_RANGE_TLB_L3SCR(r) _BN( 1+((r)*4)) /* 'r' in {0..7} Bit 1 of each range is L3 ScratchPad. */ + /* Bits 2,3 of each range are reserved. */ + +#define _BGP_DCR_rDMA_REC_RANGE_TLB (_BGP_DCR_DMA+0x13) +#define _rDMA_REC_RANGE_TLB_L3CIN(r) _BN( 0+((r)*4)) /* 'r' in {0..7} Bit 0 of each range is L3 Cache Inhibit */ +#define _rDMA_REC_RANGE_TLB_L3SCR(r) _BN( 1+((r)*4)) /* 'r' in {0..7} Bit 1 of each range is L3 ScratchPad. */ + +/* g in the interval [0:7] */ +/* 32bit 16Byte aligned Physical Addresses containing (0..3 of UA | 0..27 of PA). */ +#define _BGP_DCR_rDMA_MIN_VALID_ADDR(g) (_BGP_DCR_DMA+((2*(g))+0x14)) +#define _BGP_DCR_rDMA_MAX_VALID_ADDR(g) (_BGP_DCR_DMA+((2*(g))+0x15)) + +/* j in the interval [0:1] */ +#define _BGP_DCR_iDMA_TS_FIFO_WM(j) (_BGP_DCR_DMA+(0x24+(j))) +#define _iDMA_TS_FIFO_WM_N0(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */ +#define _iDMA_TS_FIFO_WM_N1(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */ +#define _iDMA_TS_FIFO_WM_N2(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */ +#define _iDMA_TS_FIFO_WM_P0(x) _B6(31,(x)) /* bit {26..31} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */ +#define _iDMA_TS_FIFO_WM_N3(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */ +#define _iDMA_TS_FIFO_WM_N4(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */ +#define _iDMA_TS_FIFO_WM_N5(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */ +#define _iDMA_TS_FIFO_WM_P1(x) _B6(31,(x)) /* bit {26..31} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */ + +#define _iDMA_TS_FIFO_WM0_INIT (_iDMA_TS_FIFO_WM_N0(20) | \ + _iDMA_TS_FIFO_WM_N1(20) | \ + _iDMA_TS_FIFO_WM_N2(20) | \ + _iDMA_TS_FIFO_WM_P0(20)) +#define _iDMA_TS_FIFO_WM1_INIT (_iDMA_TS_FIFO_WM_N3(20) | \ + _iDMA_TS_FIFO_WM_N4(20) | \ + _iDMA_TS_FIFO_WM_N5(20) | \ + _iDMA_TS_FIFO_WM_P1(20)) + +#define _BGP_DCR_iDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY (_BGP_DCR_DMA+0x26) +#define _iDMA_LOCAL_FIFO_WM(x) _B7(7,(x)) /* bit {1..7} of _BGP_DCR_iDMA_LOCAL_FIFO_WM_RPT_CNT, set to decimal 55, 0x37 */ +#define _iDMA_HP_INJ_FIFO_RPT_CNT(x) _B4(11,(x)) /* bit {8..11} dma repeat count for using torus high priority injection fifo */ +#define _iDMA_NP_INJ_FIFO_RPT_CNT(x) _B4(15,(x)) /* bit {12..15} dma repeat count for using torus normal priority injection fifo */ +#define _iDMA_INJ_DELAY(x) _B4(23,(x)) /* bit {20..23} dma delay this amount of clock_x2 cycles before injecting next packet */ + +#define _iDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY_INIT (_iDMA_LOCAL_FIFO_WM(55) | \ + _iDMA_HP_INJ_FIFO_RPT_CNT(0) | \ + _iDMA_NP_INJ_FIFO_RPT_CNT(0) | \ + _iDMA_INJ_DELAY(0)) + +/* p in the interval [0:3] */ +#define _BGP_DCR_rDMA_TS_FIFO_WM(p) (_BGP_DCR_DMA+(0x27+(p))) +#define _rDMA_TS_FIFO_WM_G0N0(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */ +#define _rDMA_TS_FIFO_WM_G0N1(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */ +#define _rDMA_TS_FIFO_WM_G0N2(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */ +#define _rDMA_TS_FIFO_WM_G0N3(x) _B6(31,(x)) /* bit {26..31} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */ +#define _rDMA_TS_FIFO_WM_G0N4(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_rDMA_TORUS_FIFO_WM(1), must be 0 */ +#define _rDMA_TS_FIFO_WM_G0N5(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(1), must be 0 */ +#define _rDMA_TS_FIFO_WM_G0P(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(1), must be 0 */ +#define _rDMA_TS_FIFO_WM_G1N0(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */ +#define _rDMA_TS_FIFO_WM_G1N1(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */ +#define _rDMA_TS_FIFO_WM_G1N2(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */ +#define _rDMA_TS_FIFO_WM_G1N3(x) _B6(31,(x)) /* bit {26..31} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */ +#define _rDMA_TS_FIFO_WM_G1N4(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_rDMA_TORUS_FIFO_WM(3), must be 0 */ +#define _rDMA_TS_FIFO_WM_G1N5(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(3), must be 0 */ +#define _rDMA_TS_FIFO_WM_G1P(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(3), must be 0 */ + +#define _rDMA_TS_FIFO_WM0_INIT (_rDMA_TS_FIFO_WM_G0N0(0) | \ + _rDMA_TS_FIFO_WM_G0N1(0) | \ + _rDMA_TS_FIFO_WM_G0N2(0) | \ + _rDMA_TS_FIFO_WM_G0N3(0)) +#define _rDMA_TS_FIFO_WM1_INIT (_rDMA_TS_FIFO_WM_G0N4(0) | \ + _rDMA_TS_FIFO_WM_G0N5(0) | \ + _rDMA_TS_FIFO_WM_G0P(0)) +#define _rDMA_TS_FIFO_WM2_INIT (_rDMA_TS_FIFO_WM_G1N0(0) | \ + _rDMA_TS_FIFO_WM_G1N1(0) | \ + _rDMA_TS_FIFO_WM_G1N2(0) | \ + _rDMA_TS_FIFO_WM_G1N3(0)) +#define _rDMA_TS_FIFO_WM3_INIT (_rDMA_TS_FIFO_WM_G1N4(0) | \ + _rDMA_TS_FIFO_WM_G1N5(0) | \ + _rDMA_TS_FIFO_WM_G1P(0)) + +#define _BGP_DCR_rDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY (_BGP_DCR_DMA+0x2b) +#define _rDMA_LOCAL_FIFO_WM(x) _B7(7,(x)) /* bit {1..7}, local fifo watermark, must be 0 */ +#define _rDMA_HP_REC_FIFO_RPT_CNT(x) _B4(11,(x)) /* bit {8..11}, dma repeat count for torus high priority reception fifos */ +#define _rDMA_NP_REC_FIFO_RPT_CNT(x) _B4(15,(x)) /* bit {12..15}, dma repeat count for torus normal priority reception fifos */ +#define _rDMA_DELAY(x) _B4(23,(x)) /* bit {20..23}, dma delay this amount of clock_x2 cycles between packets */ + +#define _rDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY_INIT (_rDMA_LOCAL_FIFO_WM(0) | \ + _rDMA_HP_REC_FIFO_RPT_CNT(0) | \ + _rDMA_NP_REC_FIFO_RPT_CNT(0) | \ + _rDMA_DELAY(0)) + +/* i in the interval [0:3] */ +#define _BGP_DCR_iDMA_FIFO_ENABLE(i) (_BGP_DCR_DMA+(0x2c+(i))) /* each bit, if '1', enables an injection fifo */ +#define _BGP_DCR_rDMA_FIFO_ENABLE (_BGP_DCR_DMA+0x30) /* each bit, if '1', enables a reception fifo */ +#define _BGP_DCR_rDMA_FIFO_ENABLE_HEADER (_BGP_DCR_DMA+0x31) +#define _rDMA_FIFO_ENABLE_HEADER0 _BN(28) +#define _rDMA_FIFO_ENABLE_HEADER1 _BN(29) +#define _rDMA_FIFO_ENABLE_HEADER2 _BN(30) +#define _rDMA_FIFO_ENABLE_HEADER3 _BN(31) + +/* i in the interval [0:3] */ +#define _BGP_DCR_iDMA_FIFO_PRIORITY(i) (_BGP_DCR_DMA+(0x32+(i))) +#define _BGP_DCR_iDMA_FIFO_RGET_THRESHOLD (_BGP_DCR_DMA+0x36) +#define _BGP_DCR_iDMA_SERVICE_QUANTA (_BGP_DCR_DMA+0x37) +#define _iDMA_SERVICE_QUANTA_HP(x) _B16(15,(x)) +#define _iDMA_SERVICE_QUANTA_NP(x) _B16(31,(x)) +#define _iDMA_SERVICE_QUANTA_INIT (_iDMA_SERVICE_QUANTA_HP(0) | _iDMA_SERVICE_QUANTA_NP(0)) + +#define _BGP_DCR_rDMA_FIFO_TYPE (_BGP_DCR_DMA+0x38) +#define _BGP_DCR_rDMA_FIFO_TYPE_HEADER (_BGP_DCR_DMA+0x39) +#define _rDMA_FIFO_TYPE_HEADER0 _BN(28) +#define _rDMA_FIFO_TYPE_HEADER1 _BN(29) +#define _rDMA_FIFO_TYPE_HEADER2 _BN(30) +#define _rDMA_FIFO_TYPE_HEADER3 _BN(31) +#define _BGP_DCR_rDMA_FIFO_THRESH0 (_BGP_DCR_DMA+0x3a) +#define _BGP_DCR_rDMA_FIFO_THRESH1 (_BGP_DCR_DMA+0x3b) + +/* k in the interval [0:31] */ +#define _BGP_DCR_iDMA_TS_INJ_FIFO_MAP(k) (_BGP_DCR_DMA+(0x3c+(k))) /* 8 bits for every dma injection fifo */ +/* @ Dong, for MG, is the following line good? */ +/* j in the interval [0:3] */ +#define _iDMA_TS_INJ_FIFO_MAP_FIELD(j, x) _B8((7+(j)*8), (x)) +/* i in the interval [0:3] */ +#define _BGP_DCR_iDMA_LOCAL_COPY(i) (_BGP_DCR_DMA+(0x5c+(i))) /* one bit for every dma injection fifo */ + +/* XY = X, Y */ +/* ZHL = Z, High Priority, Local Copy */ +#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_XY (_BGP_DCR_DMA+0x60) /* torus recv group 0, (pid0, pid1) = "00" */ +#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_ZHL (_BGP_DCR_DMA+0x61) +#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_XY (_BGP_DCR_DMA+0x62) +#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_ZHL (_BGP_DCR_DMA+0x63) +#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_XY (_BGP_DCR_DMA+0x64) +#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_ZHL (_BGP_DCR_DMA+0x65) +#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_XY (_BGP_DCR_DMA+0x66) +#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_ZHL (_BGP_DCR_DMA+0x67) +#define _rDMA_TS_REC_FIFO_MAP_XP(x) _B8(7,(x)) +#define _rDMA_TS_REC_FIFO_MAP_XM(x) _B8(15,(x)) +#define _rDMA_TS_REC_FIFO_MAP_YP(x) _B8(23,(x)) +#define _rDMA_TS_REC_FIFO_MAP_YM(x) _B8(31,(x)) +#define _rDMA_TS_REC_FIFO_MAP_ZP(x) _B8(7,(x)) +#define _rDMA_TS_REC_FIFO_MAP_ZM(x) _B8(15,(x)) +#define _rDMA_TS_REC_FIFO_MAP_HIGH(x) _B8(23,(x)) +#define _rDMA_TS_REC_FIFO_MAP_LOCAL(x) _B8(31,(x)) + +/* ii in the interval [0:3] group 0, group 1, ..., group 3 */ +#define _BGP_DCR_rDMA_FIFO_CLEAR_MASK(ii) (_BGP_DCR_DMA+(0x68+(ii))) +#define _rDMA_FIFO_CLEAR_MASK0_INIT 0xFF000000 +#define _rDMA_FIFO_CLEAR_MASK1_INIT 0x00FF0000 +#define _rDMA_FIFO_CLEAR_MASK2_INIT 0x0000FF00 +#define _rDMA_FIFO_CLEAR_MASK3_INIT 0x000000FF +#define _BGP_DCR_rDMA_FIFO_HEADER_CLEAR_MASK (_BGP_DCR_DMA+0x6c) +#define _rDMA_FIFO_HEADER_CLEAR_MASK_INIT 0x08040201 + +/* g in the interval [0:3] group 0, group 1, group2, and group 3 */ +#define _BGP_DCR_iDMA_FIFO_INT_ENABLE_GROUP(g) (_BGP_DCR_DMA+(0x6d+(g))) +/* t in the interval [0:3] type 0, type 1, ..., type 3 */ +#define _BGP_DCR_rDMA_FIFO_INT_ENABLE_TYPE(t) (_BGP_DCR_DMA+(0x71+(t))) +#define _BGP_DCR_rDMA_HEADER_FIFO_INT_ENABLE (_BGP_DCR_DMA+0x75) +#define _rDMA_HEADER_HEADER_FIFO_INT_ENABLE_TYPE(t,x) _B4((7+(t)*8), (x)) + +/* g in the interval [0:3] group 0, group 1, ..., group 3 */ +#define _BGP_DCR_iDMA_COUNTER_INT_ENABLE_GROUP(g) (_BGP_DCR_DMA+(0x76+(g))) + +/* g in the interval [0:3] group 0, group 1, ..., group 3 */ +#define _BGP_DCR_rDMA_COUNTER_INT_ENABLE_GROUP(g) (_BGP_DCR_DMA+(0x7a+(g))) + +/* ---------------------------- */ +/* ---- Fatal Error Enables ----- */ +/* ---------------------------- */ +/* e in the interval [0:3], bit definition in the fatal errors at 0x93 - 0x96 */ +#define _BGP_DCR_DMA_FATAL_ERROR_ENABLE(e) (_BGP_DCR_DMA +(0x7e +(e))) + +/* ------------------------------- */ +/* ---- Backdoor Access Regs ----- */ +/* ------------------------------- */ +#define _BGP_DCR_DMA_LF_IMFU_DESC_BD_CTRL (_BGP_DCR_DMA+0x82) +#define _DMA_LF_IMFU_DESC_BD_CTRL_ENABLE _BN(0) /* if '1', enable backdoor read/write */ +#define _DMA_LF_IMFU_DESC_BD_CTRL_NOECC _BN(1) /* if '1', do not do ECC on backdoor read/write */ +#define _DMA_LF_IMFU_DESC_BD_CTRL_RD_REQ _BN(2) /* if '1', do read */ +#define _DMA_LF_IMFU_DESC_BD_CTRL_WR_REQ _BN(3) /* if '1', do write */ +#define _DMA_LF_IMFU_DESC_BD_CTRL_IMFU_SEL _BN(4) /* unit select, '0' local fifo, '1' imfu descriptor */ +#define _DMA_LF_IMFU_DESC_BD_CTRL_LF_ADDR(x) _B7(15,(x)) /* 7 bit sram address for local fifo */ +#define _DMA_LF_IMFU_DESC_BD_CTRL_IMFU_ADDR(x) _B8(15,(x)) /* 8 bit sram address for imfu descriptor */ +#define _DMA_LF_IMFU_DESC_BD_CTRL_WR_ECC0(x) _B8(23,(x)) /* 8 bit write ECC for data bits 0 to 63 */ +#define _DMA_LF_IMFU_DESC_BD_CTRL_WR_ECC1(x) _B8(31,(x)) /* 8 bit write ECC for data bits 64 to 128 */ +/* i in the interval [0:3] */ +#define _BGP_DCR_DMA_LF_IMFU_DESC_BACKDOOR_WR_DATA(i) (_BGP_DCR_DMA+(0x83+(i))) /* 128 bit backdoor write data */ +#define _BGP_DCR_DMA_ARRAY_BD_CTRL (_BGP_DCR_DMA+0x87) /* fifo/counter array backdoor control */ +#define _DMA_ARRAY_BD_CTRL_ENABLE _BN(0) +#define _DMA_ARRAY_BD_CTRL_RD_SEL_IMFU_FIFO _B2(2,0) /* unit select for backdoor read */ +#define _DMA_ARRAY_BD_CTRL_RD_SEL_IMFU_COUNTER _B2(2,1) +#define _DMA_ARRAY_BD_CTRL_RD_SEL_RMFU_FIFO _B2(2,2) +#define _DMA_ARRAY_BD_CTRL_RD_SEL_RMFU_COUNTER _B2(2,3) +#define _DMA_ARRAY_BD_CTRL_WR_ECC(x) _B7(15,(x)) + +/* ------------------------------------- */ +/* ---- Torus Link Checker Control ----- */ +/* ------------------------------------- */ +#define _BGP_DCR_DMA_TS_LINK_CHK_CTRL (_BGP_DCR_DMA+0x88) +#define _DMA_TS_LINK_CHK_CTRL_SEL(x) _B3(2,(x)) /* 0 - xp, 1 - xm, 2 - yp, 3 - ym, 4 - zp, 5 - zm, 6, 7 disable */ +#define _DMA_TS_LINK_CHK_CTRL_RW_ENABLE _BN(8) /* if 1, enable read/write to link checker internal sram */ +#define _DMA_TS_LINK_CHK_CTRL_WR_REQ _BN(12) +#define _DMA_TS_LINK_CHK_CTRL_RD_REQ _BN(13) +#define _DMA_TS_LINK_CHK_CTRL_ADDR(x) _B10(23,(x)) +#define _DMA_TS_LINK_CHK_CTRL_WR_DATA(x) _B8(31,(x)) +#define _DMA_TS_LINK_CHK_BAD_OFFSET (0) /* sram address where bad packet starts */ +#define _DMA_TS_LINK_CHK_GOOD_OFFSET (320) /* sram address where good packet starts */ + + +/* -------------------- */ +/* ---- Threshold ----- */ +/* -------------------- */ +#define _BGP_DCR_DMA_CE_COUNT_THRESHOLD (_BGP_DCR_DMA+0x89) /* correctable ecc error count threshold, reset to 0xFFFFFFFF */ +/* default used when system comes out of reset, will have to be tuned */ +#define _BGP_DCR_DMA_CE_COUNT_THRESHOLD_INIT 1 + +/* ---------------------------------- */ +/* ---- Correctable error count ----- */ +/* ---------------------------------- */ +/* c in the interval [0:8] count 0, count 1, ..., count 8 */ +#define _BGP_DCR_DMA_CE_COUNT(c) (_BGP_DCR_DMA+(0x8A+(c))) +#define _BGP_DCR_DMA_CE_COUNT_INJ_FIFO0 (_BGP_DCR_DMA+0x8A) +#define _BGP_DCR_DMA_CE_COUNT_INJ_FIFO1 (_BGP_DCR_DMA+0x8B) +#define _BGP_DCR_DMA_CE_COUNT_INJ_COUNTER (_BGP_DCR_DMA+0x8C) +#define _BGP_DCR_DMA_CE_COUNT_INJ_DESC (_BGP_DCR_DMA+0x8D) +#define _BGP_DCR_DMA_CE_COUNT_REC_FIFO0 (_BGP_DCR_DMA+0x8E) +#define _BGP_DCR_DMA_CE_COUNT_REC_FIFO1 (_BGP_DCR_DMA+0x8F) +#define _BGP_DCR_DMA_CE_COUNT_REC_COUNTER (_BGP_DCR_DMA+0x90) +#define _BGP_DCR_DMA_CE_COUNT_LOCAL_FIFO0 (_BGP_DCR_DMA+0x91) +#define _BGP_DCR_DMA_CE_COUNT_LOCAL_FIFO1 (_BGP_DCR_DMA+0x92) + +/* upon termination, create RAS event if any of the above counts are greater than this value */ +#define _BGP_DCR_DMA_CE_TERM_THRESH 0 + +/* ----------------- */ +/* ---- Status ----- */ +/* ----------------- */ +/* e in the interval [0:3] error0, error1, ..., error 3 */ +#define _BGP_DCR_DMA_FATAL_ERROR(e) (_BGP_DCR_DMA+(0x93+(e))) + +/* Below are are error conditions most likely caused by software */ +#define _BGP_DCR_DMA_FATAL_ERROR0_WR0_MSB _BN(4) /* pque wr0 msb not 0 */ +#define _BGP_DCR_DMA_FATAL_ERROR0_RD0_MSB _BN(8) /* pque rd0 msb not 0 */ +#define _BGP_DCR_DMA_FATAL_ERROR0_WR1_MSB _BN(12) /* pque wr1 msb not 0 */ +#define _BGP_DCR_DMA_FATAL_ERROR0_RD1_MSB _BN(16) /* pque rd1 msb not 0 */ + +#define _BGP_DCR_DMA_FATAL_ERROR1_REC_MAP _BN(22) /* multiple bits set for the dcr rec fifo map */ + + +#define _BGP_DCR_DMA_FATAL_ERROR2_FIFO_SEL _BN(14) /* fifo_sel_n error */ +#define _BGP_DCR_DMA_FATAL_ERROR2_FIFO_SEL_FORM _BN(15) /* fifo_sel_n_form error */ +#define _BGP_DCR_DMA_FATAL_ERROR2_READ_RANGE _BN(25) /* read from address not in one of dcr address ranges */ + +#define _BGP_DCR_DMA_FATAL_ERROR3_DPUT_SIZE _BN(8) /* direct put packet had greater than 240 bytes */ +#define _BGP_DCR_DMA_FATAL_ERROR3_RGET_SIZE _BN(9) /* remote get packet had greater than 240 bytes */ +#define _BGP_DCR_DMA_FATAL_ERROR3_MAX_ADDRESS _BN(18) /* write to address larger than counter max */ +#define _BGP_DCR_DMA_FATAL_ERROR3_WRITE_RANGE _BN(26) /* write to address not in one of dcr address ranges */ + +#define _BGP_DCR_DMA_PQUE_WR0_BAD_ADDR (_BGP_DCR_DMA+0x97) +#define _BGP_DCR_DMA_PQUE_RD0_BAD_ADDR (_BGP_DCR_DMA+0x98) +#define _BGP_DCR_DMA_PQUE_WR1_BAD_ADDR (_BGP_DCR_DMA+0x99) +#define _BGP_DCR_DMA_PQUE_RD1_BAD_ADDR (_BGP_DCR_DMA+0x9a) + +#define _BGP_DCR_DMA_MFU_STAT0 (_BGP_DCR_DMA+0x9b) +#define _DMA_MFU_STAT0_IMFU_NOT_ENABLED_COUNTER_ID(x) _G8((x), 7) /* idma not enabled counter id */ +#define _DMA_MFU_STAT0_IMFU_UNDERFLOW_COUNTER_ID(x) _G8((x), 15) /* idma underflow counter id */ +#define _DMA_MFU_STAT0_IMFU_OVERFLOW_NB_ADDR(x) _G16((x), 31) /* idma netbus addr that caused counter overflow */ +#define _BGP_DCR_DMA_MFU_STAT1 (_BGP_DCR_DMA+0x9c) +#define _DMA_MFU_STAT1_IMFU_CUR_FIFO_ID(x) _G7((x), 7) /* current fifo id that idma is working on */ +#define _DMA_MFU_STAT1_RMFU_UNDERFLOW_COUNTER_ID(x) _G8((x), 15) /* rdma underflow counter id */ +#define _DMA_MFU_STAT1_RMFU_OVERFLOW_NB_ADDR(x) _G16((x), 31) /* rdma netbus addr that caused counter overflow */ +#define _BGP_DCR_DMA_MFU_STAT2 (_BGP_DCR_DMA+0x9d) +#define _DMA_MFU_STAT2_RMFU_FIFO_NE_OR_NA(x) _GN((x), 0) /* rdma fifo not enabled or not all_available */ +#define _DMA_MFU_STAT2_RMFU_HDR_FIFO_NE_OR_NA(x) _GN((x), 1) /* rdma header fifo not enabled or not all_available */ +#define _DMA_MFU_STAT2_RMFU_INJ_FIFO_NE_OR_NA(x) _GN((x), 2) /* rdma injection fifo for remote get not enabled or not all_available */ +#define _DMA_MFU_STAT2_RMFU_COUNTER_NE(x) _GN((x), 3) /* rdma accessing not enabled counter */ +#define _DMA_MFU_STAT2_RMFU_PKT_PID(x) _G2((x), 7) /* rdma receiving packet pid */ +#define _DMA_MFU_STAT2_RMFU_FIFO_BIT(x) _G8((x), 15) /* rdma receiving packet fifo bit, only one bit should be set */ + /* bit orders are xp, xm, yp, ym, zp, zm, hp, local */ +#define _DMA_MFU_STAT2_RMFU_RGET_FIFO_ID(x) _G8((x), 23) /* rdma remote get (injection) fifo id */ +#define _DMA_MFU_STAT2_RMFU_COUNTER_ID(x) _G8((x), 31) /* rdma direct put counter id */ +#define _BGP_DCR_DMA_L3_RD_ERROR_ADDR (_BGP_DCR_DMA+0x9e) +#define _BGP_DCR_DMA_L3_WR_ERROR_ADDR (_BGP_DCR_DMA+0x9f) + +/* i in the interval [0:3] */ +#define _BGP_DCR_DMA_LF_IMFU_DESC_BD_RD_DATA(i) (_BGP_DCR_DMA+(0xa0+(i))) +#define _BGP_DCR_DMA_LF_IMFU_DESC_BD_RD_ECC (_BGP_DCR_DMA+0xa4) +#define _DMA_LF_IMFU_DESC_BD_RD_ECC_DWORD0(x) _G8((x),23) /* ecc for data bits 0 to 63 */ +#define _DMA_LF_IMFU_DESC_BD_RD_ECC_DWORD1(x) _G8((x),31) /* ecc for data bits 64 to 127 */ +#define _BGP_DCR_DMA_ARRAY_RD_ECC (_BGP_DCR_DMA+0xa5) +#define _DMA_ARRAY_RD_ECC_WORD0(x) _G7((x), 7) /* word address offset 0 */ +#define _DMA_ARRAY_RD_ECC_WORD1(x) _G7((x), 15) /* word address offset 1 */ +#define _DMA_ARRAY_RD_ECC_WORD2(x) _G7((x), 23) /* word address offset 2 */ +#define _DMA_ARRAY_RD_ECC_WORD3(x) _G7((x), 31) /* word address offset 3 */ +#define _BGP_DCR_DMA_TS_LINK_CHK_STAT (_BGP_DCR_DMA+0xa6) +#define _DMA_TS_LINK_CHK_STAT_PKT_CAPTURED(x) _GN((x), 0) /* bad packet captured flag */ +#define _DMA_TS_LINK_CHK_STAT_RECV_PIPE_FERR(x) _GN((x), 1) /* receive pipe fatal error */ +#define _DMA_TS_LINK_CHK_STAT_STATE(x) _G4((x), 7) /* state machine state */ +#define _DMA_TS_LINK_CHK_STAT_SRAM_ADDR(x) _G10((x), 23) /* current sram read or write address */ +#define _DMA_TS_LINK_CHK_STAT_SRAM_RD_DATA(x) _G8((x), 31) /* sram read data */ + +/* ---- Debug ----- */ +/* i in the interval [0:3] */ +#define _BGP_DCR_DMA_iFIFO_DESC_RD_FLAG(i) (_BGP_DCR_DMA+(0xa7+(i))) +/* j in the interval [0:1] */ +#define _BGP_DCR_DMA_INTERNAL_STATE(j) (_BGP_DCR_DMA+(0xab+(j))) +#define _DMA_INTERNAL_STATE0_IMFU_SEL_STATE(x) _G3((x), 2) +#define _DMA_INTERNAL_STATE0_IMFU_ARB_STATE(x) _G5((x), 7) +#define _DMA_INTERNAL_STATE0_IMFU_FIFO_ARB_STATE(x) _G5((x), 12) +#define _DMA_INTERNAL_STATE0_IMFU_CNT_ARB_STATE(x) _G4((x), 16) +#define _DMA_INTERNAL_STATE0_RMFU_ARB_STATE(x) _G5((x), 23) +#define _DMA_INTERNAL_STATE0_RMFU_FIFO_ARB_STATE(x) _G4((x), 27) +#define _DMA_INTERNAL_STATE0_RMFU_CNT_ARB_STATE(x) _G4((x), 31) + +#define _DMA_INTERNAL_STATE1_PQUE_ARB_STATE(x) _G3((x), 2) +#define _DMA_INTERNAL_STATE1_ICONU_SM_STATE(x) _G4((x), 6) +#define _DMA_INTERNAL_STATE1_IFSU_SM_STATE(x) _G3((x), 9) +#define _DMA_INTERNAL_STATE1_IDAU_L3RSM_STATE(x) _G3((x), 12) +#define _DMA_INTERNAL_STATE1_IDAU_L3VSM_STATE(x) _G3((x), 15) +#define _DMA_INTERNAL_STATE1_IDAU_TTSM_STATE(x) _G3((x), 18) +#define _DMA_INTERNAL_STATE1_RCONU_SM_STATE(x) _G4((x), 22) +#define _DMA_INTERNAL_STATE1_RFSU_SM_STATE(x) _G3((x), 25) +#define _DMA_INTERNAL_STATE1_RDAU_QRSM_STATE(x) _G3((x), 28) +#define _DMA_INTERNAL_STATE1_RDAU_L3SM_STATE(x) _G3((x), 31) + +/* values for _BGP_DCR_DMA_INTERNAL_STATE when all state machines are in idle, or wait state */ +#define _BGP_DCR_DMA_INTERNAL_STATE_0_IDLE (0x21088111) + +/* values for _BGP_DCR_DMA_INTERNAL_STATE when all state machines are in idle, or wait state */ +#define _BGP_DCR_DMA_INTERNAL_STATE_0_IDLE (0x21088111) +#define _BGP_DCR_DMA_INTERNAL_STATE_1_IDLE (0x22492249) + +#define _BGP_DCR_DMA_PQUE_POINTER (_BGP_DCR_DMA+0xad) +#define _DMA_PQUE_POINTER_WR0_BEGIN(x) _G4((x),3) +#define _DMA_PQUE_POINTER_WR0_END(x) _G4((x),7) +#define _DMA_PQUE_POINTER_RD0_BEGIN(x) _G4((x),11) +#define _DMA_PQUE_POINTER_RD0_END(x) _G4((x),15) +#define _DMA_PQUE_POINTER_WR1_BEGIN(x) _G4((x),19) +#define _DMA_PQUE_POINTER_WR1_END(x) _G4((x),23) +#define _DMA_PQUE_POINTER_RD1_BEGIN(x) _G4((x),27) +#define _DMA_PQUE_POINTER_RD1_END(x) _G4((x),31) +#define _BGP_DCR_DMA_LOCAL_FIFO_POINTER (_BGP_DCR_DMA+0xae) +#define _DMA_LOCAL_FIFO_POINTER_BEGIN(x) _G8((x),7) +#define _DMA_LOCAL_FIFO_POINTER_END(x) _G8((x),15) +#define _DMA_LOCAL_FIFO_POINTER_END_OF_PKT(x) _G8((x),23) +#define _BGP_DCR_DMA_WARN_ERROR (_BGP_DCR_DMA+0xaf) + +/* offsets 0xb0 are reserved */ + +/* ---- Clears ----- */ +#define _BGP_DCR_DMA_CLEAR0 (_BGP_DCR_DMA+0xb1) +#define _DMA_CLEAR0_IMFU_ARB_WERR _BN(0) +#define _DMA_CLEAR0_IMFU_COUNTER_UNDERFLOW _BN(1) +#define _DMA_CLEAR0_IMFU_COUNTER_OVERFLOW _BN(2) +#define _DMA_CLEAR0_RMFU_COUNTER_UNDERFLOW _BN(3) +#define _DMA_CLEAR0_RMFU_COUNTER_OVERFLOW _BN(4) +#define _DMA_CLEAR0_RMFU_ARB_WERR _BN(5) +#define _DMA_CLEAR0_PQUE_WR0_BEN_WERR _BN(6) +#define _DMA_CLEAR0_PQUE_WR0_ADDR_CHK_WERR _BN(7) +#define _DMA_CLEAR0_PQUE_RD0_ADDR_CHK_WERR _BN(8) +#define _DMA_CLEAR0_PQUE_WR1_BEN_WERR _BN(9) +#define _DMA_CLEAR0_PQUE_WR1_ADDR_CHK_WERR _BN(10) +#define _DMA_CLEAR0_PQUE_RD1_ADDR_CHK_WERR _BN(11) +#define _DMA_CLEAR0_PQUE_WR0_HOLD_BAD_ADDR _BN(12) +#define _DMA_CLEAR0_PQUE_RD0_HOLD_BAD_ADDR _BN(13) +#define _DMA_CLEAR0_PQUE_WR1_HOLD_BAD_ADDR _BN(14) +#define _DMA_CLEAR0_PQUE_RD1_HOLD_BAD_ADDR _BN(15) +#define _DMA_CLEAR0_IFIFO_ARRAY_UE0 _BN(16) +#define _DMA_CLEAR0_IFIFO_ARRAY_UE1 _BN(17) +#define _DMA_CLEAR0_ICOUNTER_ARRAY_UE _BN(18) +#define _DMA_CLEAR0_IMFU_DESC_UE _BN(19) +#define _DMA_CLEAR0_RFIFO_ARRAY_UE0 _BN(20) +#define _DMA_CLEAR0_RFIFO_ARRAY_UE1 _BN(21) +#define _DMA_CLEAR0_RCOUNTER_ARRAY_UE _BN(22) +#define _DMA_CLEAR0_LOCAL_FIFO_UE0 _BN(23) +#define _DMA_CLEAR0_LOCAL_FIFO_UE1 _BN(24) + +#define _BGP_DCR_DMA_CLEAR1 (_BGP_DCR_DMA+0xb2) +#define _DMA_CLEAR1_TS_LINK_CHK _BN(0) + + +#endif diff --git a/drivers/net/bgp_collective/ppc450.h b/drivers/net/bgp_collective/ppc450.h new file mode 100644 index 00000000000000..0f312cb39671e3 --- /dev/null +++ b/drivers/net/bgp_collective/ppc450.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2007, 2008 International Business Machines + * Volkmar Uhlig <vuhlig@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#ifndef __DRIVERS__BLUEGENE__PPC450_H__ +#define __DRIVERS__BLUEGENE__PPC450_H__ + +/* include asm instruction macros */ +/* #include <asm/ppc450.h> */ + +/********************************************************************** + * DCR access wrapper + **********************************************************************/ + +extern inline uint32_t mfdcrx(uint32_t dcrn) +{ + uint32_t value; + asm volatile ("mfdcrx %0,%1": "=r" (value) : "r" (dcrn) : "memory"); + return value; +} + +extern inline void mtdcrx(uint32_t dcrn, uint32_t value) +{ + asm volatile("mtdcrx %0,%1": :"r" (dcrn), "r" (value) : "memory"); +} + +/* volatile 32bit read */ +extern inline uint32_t in_be32_nosync(uint32_t *vaddr) +{ + volatile uint32_t *va = (volatile uint32_t *) vaddr; + /* _bgp_mbar(); */ + return *va; +} + + +/********************************************************************** + * Helper functions to access IO via double hummer + **********************************************************************/ + +extern inline void fpu_memcpy_16(void *dst, void *src) +{ + asm volatile("lfpdx 0,0,%0\n" + "stfpdx 0,0,%1\n" + : + : "b"(src), "b"(dst) + : "fr0", "memory"); +} + +extern inline void out_be128(void *port, void *ptrval) +{ + u32 tmp[4] __attribute__((aligned(16))); + + if ((u32)ptrval & 0xf) { + memcpy(tmp, ptrval, 16); + ptrval = tmp; + } + + fpu_memcpy_16(port, ptrval); +} + +extern inline void outs_be128(void *port, void *src, unsigned num) +{ + u32 tmp[4] __attribute__((aligned(16))); + + /* port must be 16 byte aligned */ + BUG_ON((u32)port & 0xf); + + if (unlikely((u32)src & 0xf)) { + /* unaligned destination */ + while(num--) { + memcpy(tmp, src, 16); + fpu_memcpy_16(port, tmp); + src += 16; + } + } else { + while(num--) { + fpu_memcpy_16(port, src); + src += 16; + } + } +} + +extern inline void outs_zero128(void *port, unsigned num) +{ + static u32 zero[4] __attribute__((aligned(16))) = {0, }; + BUG_ON((u32)port & 0xf); + + while (num--) + out_be128(port, zero); +} + +/* + * in string operation similar to x86: reads block of data from port + * into memory + */ +extern inline void ins_be128(void *dest, void *port, unsigned num) +{ + u32 tmp[4] __attribute__((aligned(16))); + + /* port must be 16 byte aligned */ + BUG_ON((u32)port & 0xf); + + if ((u32)dest & 0xf) + { + /* unaligned destination */ + while(num--) { + fpu_memcpy_16(tmp, port); + memcpy(dest, tmp, 16); + dest += 16; + } + } + else + { + while(num--) { + fpu_memcpy_16(dest, port); + dest += 16; + } + } +} + +extern inline void in_be128(void *dest, void *port) +{ + char tmp[16] __attribute__((aligned(16))); + void *ptr = dest; + + if ((u32)dest & 0xf) + ptr = tmp; + + fpu_memcpy_16(ptr, port); + + if ((u32)dest & 0xf) + memcpy(dest, tmp, 16); +} + +#endif /* !__DRIVERS__BLUEGENE__PPC450_H__ */ diff --git a/drivers/net/bgp_e10000/Makefile b/drivers/net/bgp_e10000/Makefile new file mode 100644 index 00000000000000..c33c97ea491e2a --- /dev/null +++ b/drivers/net/bgp_e10000/Makefile @@ -0,0 +1,5 @@ +# Makefile for BlueGene/P 10 GbE driver + +obj-$(CONFIG_BGP_E10000) += bgp_e10000.o + +bgp_e10000-objs := bgp_tomal.o bgp_emac.o bgp_e10000_main.o diff --git a/drivers/net/bgp_e10000/bgp_e10000.h b/drivers/net/bgp_e10000/bgp_e10000.h new file mode 100644 index 00000000000000..204217e3de0305 --- /dev/null +++ b/drivers/net/bgp_e10000/bgp_e10000.h @@ -0,0 +1,175 @@ +/* + * bgp_e10000.h: common header file for BlueGene/P 10 GbE driver + * + * Copyright (c) 2007, 2010 International Business Machines + * Author: Andrew Tauferner <ataufer@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include <linux/proc_fs.h> +#include <asm/io.h> + +#ifndef _BGP_E10000_H +#define _BGP_E10000_H + +#define DBG_LEVEL1 1 +#define DBG_LEVEL2 (DBG_LEVEL1 | 2) +#define DBG_LEVEL3 (DBG_LEVEL2 | 4) +#define DBG_E10000 8 +#define DBG_EMAC 16 +#define DBG_TOMAL 32 +#define DBG_XSGS 64 +#define DBG_DEVBUS 128 +#define DBG_NAPI 256 +#define DBG_SCATTERGATHER 512 + +#define BGP_E10000_MIN_MTU 256 +#define BGP_E10000_MAX_MTU 9000 +#define BGP_E10000_FCS_SIZE 4 + + +#ifdef CONFIG_BGP_E10000_DBG +#include <asm/udbg.h> +#define PRINTK(detail, format, args...) if (((detail) & CONFIG_BGP_E10000_DBG_LEVEL) == (detail)) udbg_printf("%s: " format, __FUNCTION__, ##args) +#else +#define PRINTK(detail, format, args...) +#endif + +typedef unsigned char U8; +typedef unsigned short U16; +typedef unsigned int U32; +typedef unsigned long long U64; + + +typedef enum { + e10000_ras_none = 0x00, + e10000_ras_hw_not_found = 0x01, + e10000_ras_netdev_alloc_failure = 0x02, + e10000_ras_netdev_reg_failure = 0x03, + e10000_ras_mtu_invalid = 0x04, + e10000_ras_tx_timeout = 0x05, + e10000_ras_internal_error = 0x07, + e10000_ras_hw_failure = 0x09, + e10000_ras_link_error = 0x0a, + e10000_ras_phy_reset_error = 0x0b, + e10000_ras_emac_config_error = 0x0c, + e10000_ras_link_loss = 0x0d, + + e10000_ras_max = 0xff +} e10000_ras_id; + + +typedef struct _E10000_PROC_ENTRY { + char* name; + void* addr; + struct proc_dir_entry* entry; +} E10000_PROC_ENTRY; + + + +/* Generates a RAS event for ethernet. */ +void e10000_printr(U16 subComponent, + U16 id, + char* format, + ...); + + +static inline U32 mfdcrx(U32 dcrNum) +{ + U32 dcrVal = 0; + + asm volatile("mfdcrx %0,%1": "=r" (dcrVal) : "r" (dcrNum) : "memory"); + + return dcrVal; +} + + +static inline void mtdcrx(U32 dcrNum, + U32 dcrVal) +{ + asm volatile ("mtdcrx %0,%1": :"r" (dcrNum), "r" (dcrVal) : "memory"); + isync(); + + return; +} + + +static inline void msync(void) +{ + do { asm volatile ("msync" : : : "memory"); } while(0); + + return; +} + + +static inline int e10000_proc_read(char* page, + char** start, + off_t off, + int count, + int* eof, + void* data) +{ + int rc = 0; + int value; + + /* Read the value of the associated address and print it. */ + value = in_be32(data); + rc = snprintf(page, count, "%08x\n", value); + + *eof = 1; + + return rc; +} + + +static inline int e10000_proc_write(struct file* file, + const char* buffer, + unsigned long len, + void* data) +{ + unsigned int value; + char valStr[128]; + int strLen = sizeof(valStr)-1; + + if (strLen > len) + strLen = len; + if (copy_from_user(valStr, buffer, strLen)) + return -EFAULT; + else if (len) { + char* endp; + + /* NULL terminate the string of digits and convert to its numeric value. */ + if (valStr[strLen-1] == '\n') + strLen--; + valStr[strLen] = '\0'; + value = simple_strtoul(valStr, &endp, 0); + + /* Write the value to the associated address. */ + out_be32(data, value); + } + + return len; +} + + +static inline struct proc_dir_entry* e10000_create_proc_entry(struct proc_dir_entry* dir, + char* name, + void* addr) +{ + struct proc_dir_entry* entry = create_proc_entry(name, S_IRUGO, dir); + if (entry) { + entry->nlink = 1; + entry->read_proc = e10000_proc_read; + entry->write_proc = e10000_proc_write; + entry->data = addr; + } + + return entry; +} + +#endif diff --git a/drivers/net/bgp_e10000/bgp_e10000_main.c b/drivers/net/bgp_e10000/bgp_e10000_main.c new file mode 100644 index 00000000000000..8bb8ff5d9f5e00 --- /dev/null +++ b/drivers/net/bgp_e10000/bgp_e10000_main.c @@ -0,0 +1,567 @@ +/* + * bgp_e10000_main.c: net_device source for BlueGene/P 10 GbE driver + * + * Copyright (c) 2007, 2010 International Business Machines + * Author: Andrew Tauferner <ataufer@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/if_ether.h> +#include <asm/reg_booke.h> +#include <linux/proc_fs.h> +#include <stdarg.h> +#include <asm/bluegene_ras.h> +#include <asm/bgp_personality.h> +#include <asm/bluegene.h> + +#include "bgp_e10000.h" +#include "bgp_emac.h" +#include "bgp_tomal.h" + + +static int e10000_change_mtu(struct net_device*, int); +static int e10000_do_ioctl(struct net_device*, struct ifreq*, int); +static struct net_device_stats* e10000_get_stats(struct net_device*); +static int e10000_hard_start_xmit(struct sk_buff*, struct net_device*); +static int e10000_open(struct net_device*); +//static void e10000_set_multicast_list(struct net_device*); +static int e10000_stop(struct net_device*); +static void e10000_tx_timeout(struct net_device*); +static int e10000_set_mac_address(struct net_device* netDev, void* macAddr); +static void e10000_link_test(unsigned long); + +static struct net_device* e10000NetDev; +static struct timer_list e10000LinkTimer; +static const struct net_device_ops e10000NetDevOps = { + .ndo_open = e10000_open, + .ndo_stop = e10000_stop, + .ndo_start_xmit = e10000_hard_start_xmit, + .ndo_get_stats = e10000_get_stats, + .ndo_set_mac_address = e10000_set_mac_address, + .ndo_tx_timeout = e10000_tx_timeout, + .ndo_change_mtu = e10000_change_mtu, + .ndo_do_ioctl = e10000_do_ioctl, +}; + +static BGP_Personality_t bgpers; +static void* e10000DevMapAddr; +static unsigned int e10000DevMapLen; + +static int __init + e10000_init(void) +{ + int rc = 0; + TOMAL* tomal = NULL; + EMAC* emac = NULL; + struct proc_dir_entry* e10000Dir; + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry\n"); + + /* Determine if Ethernet HW is present. */ + bluegene_getPersonality((void*) &bgpers, sizeof(bgpers)); + if (bgpers.Network_Config.RankInPSet) { /* No HW so exit. */ + rc = -ENODEV; + goto end; + } + + /* Allocate ethernet device(s). */ + e10000NetDev = alloc_etherdev(sizeof(EMAC)); + if (!e10000NetDev) { + e10000_printr(bg_subcomp_linux, e10000_ras_netdev_alloc_failure, + "Failure allocating ethernet device."); + rc = -ENOMEM; + goto end; + } + + /* Create /proc directory. */ + e10000Dir = proc_mkdir("driver/e10000", NULL); + + /* Create mapping for TOMAL and XEMAC devices. Since they are close in memory one mapping with */ + /* a small hole in between will cover both. Tell CNS where XEMAC is mapped. */ + e10000DevMapLen = XEMAC_BASE_ADDRESS + sizeof(XEMACRegs) - TOMAL_BASE_ADDRESS; + e10000DevMapAddr = ioremap(TOMAL_BASE_ADDRESS, e10000DevMapLen); + if (!e10000DevMapAddr) { + rc = -ENODEV; + goto end; + } + rc = bluegene_mapXEMAC(e10000DevMapAddr+(XEMAC_BASE_ADDRESS - TOMAL_BASE_ADDRESS)); + if (rc) { + e10000_printr(bg_subcomp_linux, 0xff, "Failure registering XEMAC mapping with CNS."); + rc = -ENODEV; + goto unmap_dev; + } + + /* Allocate and intialize TOMAL device. */ + tomal = tomal_init(e10000DevMapAddr, e10000NetDev, CONFIG_BGP_E10000_RXB, CONFIG_BGP_E10000_TXB, NULL, + 0, 0, TOMAL_IRQ0, TOMAL_IRQ1, e10000Dir); + if (IS_ERR(tomal)) { + rc = (int) tomal; + goto unmap_dev; + } + + /* Initialize XEMAC. */ + e10000NetDev->irq = XEMAC_IRQ; + emac = (EMAC*) netdev_priv(e10000NetDev); + rc = emac_init((char*) e10000DevMapAddr + (XEMAC_BASE_ADDRESS - TOMAL_BASE_ADDRESS), emac, EMAC_TYPE_XEMAC, + tomal, 0, e10000NetDev, e10000Dir); + if (rc) + goto free_tomal; + + /* Initialize network device operations. */ + e10000NetDev->netdev_ops = &e10000NetDevOps; + + /* Register the net_device. */ + rc = register_netdev(e10000NetDev); + if (rc) { + e10000_printr(bg_subcomp_linux, e10000_ras_netdev_reg_failure, + "Failure registering net_device [%p].", e10000NetDev); + goto exit_emac; + } + + /* Configure EMAC. */ + rc = emac_configure(emac); + if (rc) { + e10000_printr(bg_subcomp_e10000, e10000_ras_emac_config_error, + "EMAC configuration error. rc=%d", rc); + goto exit_emac; + } + + /* Initialize the timer. */ + e10000LinkTimer.function = e10000_link_test; + e10000LinkTimer.data = (unsigned int) e10000NetDev; + init_timer(&e10000LinkTimer); + + goto end; + +exit_emac: + emac_exit(emac); +free_tomal: + tomal_exit(tomal); +unmap_dev: + iounmap(e10000DevMapAddr); + free_netdev(e10000NetDev); +end: + + PRINTK(DBG_E10000 | DBG_LEVEL2, "exit rc=0x%x\n", rc); + + return rc; +} + + + +static int e10000_set_mac_address(struct net_device* netDev, void* macAddr) +{ + int rc = -EINVAL; + struct sockaddr* sockAddr = (struct sockaddr*) macAddr; + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p, macAddr=%p\n", + netDev, macAddr); + + if (is_valid_ether_addr(((struct sockaddr*) macAddr)->sa_data)) { + EMAC* emac = (EMAC*) netdev_priv(netDev); + unsigned long flags; + + memcpy(netDev->dev_addr, sockAddr->sa_data, netDev->addr_len); + + spin_lock_irqsave(&emac->lock, flags); + rc = emac_set_mac_address(emac); + spin_unlock_irqrestore(&emac->lock, flags); + } else + rc = -EADDRNOTAVAIL; + + PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + + +static int e10000_change_mtu(struct net_device* netDev, + int newMTU) +{ + int rc = 0; + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p, newMTU=%d\n", + netDev, newMTU); + + if (newMTU < BGP_E10000_MIN_MTU || newMTU > BGP_E10000_MAX_MTU) { + e10000_printr(bg_subcomp_e10000, e10000_ras_mtu_invalid, + "Invalid MTU of [%d] specified. Valid MTU " + "values are [%d,%d].\n", newMTU, BGP_E10000_MIN_MTU, + BGP_E10000_MAX_MTU); + rc = -EINVAL; + } else if (netDev->mtu != newMTU && netif_running(netDev)) { +/* #ifdef CONFIG_BGP_E10000_NAPI */ +/* netDev->weight = tomal->maxRxBuffers[channel]; */ +/* #endif */ + netDev->mtu = newMTU; + } + + PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +static int e10000_do_ioctl(struct net_device* netDev, + struct ifreq* req, + int cmd) +{ + int rc = 0; + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p, req=%p, cmd=0x%x\n", + netDev, req, cmd); + +// printk(KERN_CRIT "IOCTL not supported yet\n"); + + PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +static struct net_device_stats* e10000_get_stats(struct net_device* netDev) +{ + struct net_device_stats* stats = &((EMAC*) netdev_priv(netDev))->stats; + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\nexit - stats=%p\n", + netDev, stats); + + return stats; +} +#ifdef CONFIG_BGP_E10000_DBG +int e10000_diag_count ; +/* If the 'skb' has fragments ( is a scatter-gather one), display them all and the base element too */ +static void diag_display_sk(struct sk_buff* skb) +{ + int nr_frags = skb_shinfo(skb)->nr_frags; + if( skb->data_len >= 4096 || + e10000_diag_count > 0) + { + int f ; + if( e10000_diag_count > 0 ) e10000_diag_count -= 1 ; + printk(KERN_INFO "diag_display_sk skb=%p nr_frags=%d skb->data=%p skb->len=0x%08x skb->data_len=0x%08x e10000_diag_count=%d\n", + skb,nr_frags,skb->data,skb->len,skb->data_len,e10000_diag_count) ; + for(f=0;f<nr_frags;f += 1) + { + struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f]; + printk(KERN_INFO " frags[%d]->(page=%p, page_offset=0x%08x, size=0x%08x)\n", + f,frag->page,frag->page_offset,frag->size) ; + } + } +} +#endif +static int e10000_hard_start_xmit(struct sk_buff* skb, + struct net_device* netDev) +{ + int rc; + unsigned long flags; + EMAC* emac = netdev_priv(netDev); + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - skb=%p, netDev=%p\n", + skb, netDev); + +#ifdef CONFIG_BGP_E10000_DBG + if(DBG_SCATTERGATHER & CONFIG_BGP_E10000_DBG_LEVEL ) diag_display_sk(sk) ; +#endif + + spin_lock_irqsave(&emac->tomal->txLock[emac->channel], flags); + rc = tomal_xmit_tx_buffer(emac->tomal, emac->channel, skb); + if (likely(!rc)) { + emac->stats.tx_packets++; + emac->stats.tx_bytes += skb->len; + rc = NETDEV_TX_OK; + netDev->trans_start = jiffies; + } else { + netif_stop_queue(netDev); + rc = NETDEV_TX_BUSY; + } + spin_unlock_irqrestore(&emac->tomal->txLock[emac->channel], flags); + + PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + + +static int e10000_open(struct net_device* netDev) +{ + int rc = 0; + EMAC* emac = (EMAC*) netdev_priv(netDev); + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev); + + if (!emac->opened) { + U32 linkTimer; + U8 rxLink, txLink; + struct sockaddr sockAddr; + + /* Set the MAC address for this interface. */ + memcpy(sockAddr.sa_data, bgpers.Ethernet_Config.EmacID, netDev->addr_len); + e10000_set_mac_address(netDev, &sockAddr); + + /* Acquire locks for EMAC and TOMAL. */ + spin_lock(&emac->tomal->rxLock[emac->channel]); + spin_lock(&emac->tomal->txLock[emac->channel]); + spin_lock(&emac->lock); + + emac->opened = 1; + +#ifndef CONFIG_BGP_E10000_EMAC_LOOPBACK + /* Reset TOMAL */ + tomal_soft_reset(emac->tomal); + + /* PHY reset. */ + rc = bluegene_macResetPHY(); + if (rc) { + e10000_printr(bg_subcomp_e10000, e10000_ras_phy_reset_error, + "%s: PHY reset error.", netDev->name); + spin_unlock(&emac->lock); + spin_unlock(&emac->tomal->txLock[emac->channel]); + spin_unlock(&emac->tomal->rxLock[emac->channel]); + goto exit; + } + + /* Wait for link to be ready. We wait less time for a single ION so that */ + /* we timeout before the control system does. */ + linkTimer = 240; + for (txLink = 0, rxLink = 0; linkTimer && (!txLink || !rxLink); linkTimer--) { + txLink = bluegene_macTestTxLink(); + rxLink = bluegene_macTestRxLink(); + udelay(100000); + } + printk(KERN_NOTICE "%s: Link status [RX%c,TX%c]\n", netDev->name, + rxLink ? '+' : '-', txLink ? '+' : '-'); + if (!linkTimer) { + e10000_printr(bg_subcomp_e10000, e10000_ras_link_error, + "%s: No link detected.", netDev->name); + spin_unlock(&emac->lock); + spin_unlock(&emac->tomal->txLock[emac->channel]); + spin_unlock(&emac->tomal->rxLock[emac->channel]); + goto exit; + } +#endif + + /* Configure EMAC. */ + rc = emac_configure(emac); + if (rc) { + e10000_printr(bg_subcomp_e10000, e10000_ras_emac_config_error, + "EMAC configuration error. rc=%d", rc); + spin_unlock(&emac->lock); + spin_unlock(&emac->tomal->txLock[emac->channel]); + spin_unlock(&emac->tomal->rxLock[emac->channel]); + goto exit; + } + + /* Enable TX and RX for TOMAL and EMAC. */ + tomal_rx_tx_enable(emac->tomal); + emac_rx_enable(emac); + emac_tx_enable(emac); + + /* Enable IRQs. */ + tomal_irq_enable(emac->tomal, emac->channel); + emac_irq_enable(emac); + + /* Release the locks. */ + spin_unlock(&emac->lock); + spin_unlock(&emac->tomal->txLock[emac->channel]); + spin_unlock(&emac->tomal->rxLock[emac->channel]); + + /* Start the queues. */ + netif_start_queue(netDev); + + /* Start link timer. */ + mod_timer(&e10000LinkTimer, jiffies + HZ); + } +exit: + PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + + +static void e10000_link_test(unsigned long data) +{ + struct net_device* netDev = (struct net_device*) data; + static unsigned int linkLossCount = 0; + u8 txLink = bluegene_macTestTxLink(); + u8 rxLink = bluegene_macTestRxLink(); + + if (!txLink || !rxLink) { + /* Link gone. Have we reached the threshold where we are going to send a fatal event? */ + if (linkLossCount == 30) + e10000_printr(bg_subcomp_e10000, e10000_ras_link_error, + "%s: Link error detected. Link status [RX%c,TX%c]\n", netDev->name, + rxLink ? '+' : '-', txLink ? '+' : '-'); + else if (linkLossCount == 0) + /* Send non-fatal RAS when the link first disappears. */ + e10000_printr(bg_subcomp_e10000, e10000_ras_link_loss, + "%s: Loss of link detected. Link status [RX%c,TX%c]\n", netDev->name, + rxLink ? '+' : '-', txLink ? '+' : '-'); + + linkLossCount++; + } else + /* Link present. Reset counter. */ + linkLossCount = 0; + + mod_timer(&e10000LinkTimer, jiffies + HZ); + + return; +} + + +//static void e10000_set_multicast_list(struct net_device* netDev) +//{ +// PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev); +// +// emac_set_multicast_list((EMAC*) netdev_priv(netDev)); +// +// PRINTK(DBG_E10000 | DBG_LEVEL2, "exit\n"); +// +// return; +//} + + +static int e10000_stop(struct net_device* netDev) +{ + int rc = 0; + EMAC* emac = (EMAC*) netdev_priv(netDev); + unsigned long tomalRxFlags; + unsigned long tomalTxFlags; + unsigned long emacFlags; + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev); + + /* Acquire locks for EMAC and TOMAL. */ + spin_lock_irqsave(&emac->tomal->rxLock[emac->channel], tomalRxFlags); + spin_lock_irqsave(&emac->tomal->txLock[emac->channel], tomalTxFlags); + spin_lock_irqsave(&emac->lock, emacFlags); + + local_bh_disable(); + del_timer_sync(&e10000LinkTimer); + netif_stop_queue(netDev); + + emac->opened = 0; + emac_rx_disable(emac); + emac_tx_disable(emac); + emac_irq_disable(emac); + tomal_rx_tx_disable(emac->tomal); + tomal_irq_disable(emac->tomal, emac->channel); + + /* Release locks for EMAC and TOMAL. */ + spin_unlock_irqrestore(&emac->lock, emacFlags); + spin_unlock_irqrestore(&emac->tomal->txLock[emac->channel], tomalTxFlags); + spin_unlock_irqrestore(&emac->tomal->rxLock[emac->channel], tomalRxFlags); + + local_bh_enable(); + PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +static void e10000_tx_timeout(struct net_device* netDev) +{ + EMAC* emac = (EMAC*) netdev_priv(netDev); + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev); + + e10000_printr(bg_subcomp_e10000, e10000_ras_tx_timeout, + "Transmission timeout at %u, elapsed time %u\n", + (U32) jiffies, (U32)(jiffies - netDev->trans_start)); + emac->stats.tx_errors++; + + /* Attempt to reset the interface. */ + e10000_stop(netDev); + e10000_open(netDev); + + PRINTK(DBG_E10000 | DBG_LEVEL2, "exit\n"); + + return; +} + + +static void e10000_exit(void) +{ + EMAC* emac = netdev_priv(e10000NetDev); + + PRINTK(DBG_E10000 | DBG_LEVEL2, "entry\n"); + + /* Allow the HW to clean up. */ + if (emac) { + if (emac->tomal) + tomal_exit(emac->tomal); + emac_exit(emac); + } + + /* Unmap HW. */ + if (e10000DevMapAddr) + iounmap(e10000DevMapAddr); + + /* Unregister and free the net_device. */ + if (e10000NetDev) { + unregister_netdev(e10000NetDev); + free_netdev(e10000NetDev); + } + + PRINTK(DBG_E10000 | DBG_LEVEL2, "exit\n"); + + return; +} + + +extern int bgWriteRasStr(unsigned int component, + unsigned int subcomponent, + unsigned int errCode, + char* str, + unsigned int strLen); + +void e10000_printr(U16 subComponent, + U16 id, + char* format, + ...) +{ + va_list args; + int n; + char text[BG_RAS_DATA_MAX]; + + va_start(args, format); + n = vsnprintf(text, sizeof(text)-1, format, args); + va_end(args); + if (n < 0) + n = 0; + + text[n] = '\0'; + printk(KERN_WARNING "%s\n", text); + bgWriteRasStr(bg_comp_kernel, subComponent, id, text, 0); + + return; +} + + +module_init(e10000_init); +module_exit(e10000_exit); + + + +MODULE_DESCRIPTION("10Gb Ethernet Driver for BlueGene"); +MODULE_VERSION("2.0"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Andrew Tauferner"); diff --git a/drivers/net/bgp_e10000/bgp_emac.c b/drivers/net/bgp_e10000/bgp_emac.c new file mode 100644 index 00000000000000..1afa02e1e01536 --- /dev/null +++ b/drivers/net/bgp_e10000/bgp_emac.c @@ -0,0 +1,282 @@ +/* + * bgp_emac.c: XEMAC device for BlueGene/P 10 GbE driver + * + * Copyright (c) 2007, 2010 International Business Machines + * Author: Andrew Tauferner <ataufer@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#include "bgp_emac.h" +#include "bgp_e10000.h" + + +/* XEMAC accessible through /proc/driver/e10000/xemac/hw. */ +static E10000_PROC_ENTRY emac_hw_proc_entry[] = { + { "mode0", (void*) 0x00, NULL }, + { "mode1", (void*) 0x04, NULL }, + { "txMode0", (void*) 0x08, NULL }, + { "txMode1", (void*) 0x0c, NULL }, + { "rxMode", (void*) 0x10, NULL }, + { "interruptStatus", (void*) 0x14, NULL }, + { "interruptStatusEnable", (void*) 0x18, NULL }, + { "individualAddrH", (void*) 0x1c, NULL }, + { "individualAddrL", (void*) 0x20, NULL }, + { "vlanTPID", (void*) 0x24, NULL }, + { "vlanTCI", (void*) 0x28, NULL }, + { "pauseTimerValue", (void*) 0x2c, NULL }, + { "individualAddrHashTable0", (void*) 0x30, NULL }, + { "individualAddrHashTable1", (void*) 0x34, NULL }, + { "individualAddrHashTable2", (void*) 0x38, NULL }, + { "individualAddrHashTable3", (void*) 0x3c, NULL }, + { "groupAddrHashTable0", (void*) 0x40, NULL }, + { "groupAddrHashTable1", (void*) 0x44, NULL }, + { "groupAddrHashTable2", (void*) 0x48, NULL }, + { "groupAddrHashTable3", (void*) 0x4c, NULL }, + { "lastSourceAddrH", (void*) 0x50, NULL }, + { "lastSourceAddrL", (void*) 0x54, NULL }, + { "interPacketGapValue", (void*) 0x58, NULL }, + { "staCtrl", (void*) 0x5c, NULL }, + { "txRequestThreshold", (void*) 0x60, NULL }, + { "rxLowHighWaterMark", (void*) 0x64, NULL }, + { "sopCommandMode", (void*) 0x68, NULL }, + { "secondaryIndividualAddrH", (void*) 0x6c, NULL }, + { "secondaryIndividualAddrL", (void*) 0x70, NULL }, + { "txOctetsCounter1", (void*) 0x74, NULL }, + { "txOctetsCounter2", (void*) 0x78, NULL }, + { "rxOctetsCounter1", (void*) 0x7c, NULL }, + { "rxOctetsCounter2", (void*) 0x80, NULL }, + { "revisionID", (void*) 0x84, NULL }, + { "hwDebug", (void*) 0x88, NULL }, + { NULL, 0, NULL } +}; + + + +static irqreturn_t emac_irq(int irq, + void* data) +{ + struct net_device* netDev = (struct net_device*) data; + EMAC* emac = (EMAC*) netdev_priv(netDev); + U32 isr = in_be32(&emac->regs->interruptStatus); + irqreturn_t rc = IRQ_NONE; + + if (irq == netDev->irq) { + if ((isr & XEMAC_IS_TXPE) || (isr & XEMAC_IS_DB) || (isr & XEMAC_IS_TE)) { + rc = IRQ_HANDLED; + emac->stats.tx_errors++; + } + if (isr & XEMAC_IS_RXPE) { + rc = IRQ_HANDLED; + emac->stats.rx_errors++; + } + if (isr & XEMAC_IS_TFEI) { + rc = IRQ_HANDLED; + emac->stats.tx_errors++; + emac->stats.tx_fifo_errors++; + } + if (isr & XEMAC_IS_RFFI) { + rc = IRQ_HANDLED; + emac->stats.rx_errors++; + emac->stats.rx_over_errors++; + } + if (isr & XEMAC_IS_OVR) { + rc = IRQ_HANDLED; + emac->stats.rx_errors++; + emac->stats.rx_over_errors++; + } + if ((isr & XEMAC_IS_PSF) || (isr & XEMAC_IS_RTF) || (isr & XEMAC_IS_IRE)) { /* pause or runt frame or in range error? */ + rc = IRQ_HANDLED; + } + if (isr & XEMAC_IS_BDF) { + rc = IRQ_HANDLED; + emac->stats.rx_errors++; + emac->stats.rx_frame_errors++; + } + if (isr & XEMAC_IS_LF) { + rc = IRQ_HANDLED; + emac->stats.rx_errors++; + } + if (isr & XEMAC_IS_BFCS) { + rc = IRQ_HANDLED; + emac->stats.rx_errors++; + emac->stats.rx_crc_errors++; + } + if ((isr & XEMAC_IS_FTL) || (isr & XEMAC_IS_ORE)) { + rc = IRQ_HANDLED; + emac->stats.rx_errors++; + emac->stats.rx_length_errors++; + } + + out_be32(&emac->regs->interruptStatus, isr); + } + + if (rc != IRQ_HANDLED) + e10000_printr(bg_subcomp_xemac, emac_ras_irq_unknown, + "Spurious interrupt - irq=%d, isr=0x%08x.", irq, isr); + + return rc; +} + +int __init emac_init(void* devMapAddr, + EMAC* emac, + U32 type, + TOMAL* tomal, + U8 channel, + struct net_device* netDev, + struct proc_dir_entry* procDir) +{ + int rc = -EINVAL; + + PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p, type=%d, tomal=%p, netDev=%p\n", emac, type, + tomal, netDev); + + emac->type = type; + switch (type) { + case EMAC_TYPE_XEMAC: { + emac->regs = (XEMACRegs*) devMapAddr; + if (!emac->regs) { + e10000_printr(bg_subcomp_xemac, emac_ras_ioremap_error, + "Failure mapping XEMAC registers."); + rc = -ENXIO; + goto out; + } + + /* Create /proc/driver/e10000/xemac/hw */ + if (procDir) { + emac->parentDir = procDir; + emac->emacDir = proc_mkdir("xemac", procDir); + if (emac->emacDir) { + emac->hwDir = proc_mkdir("hw", emac->emacDir); + if (emac->hwDir) { + E10000_PROC_ENTRY* entry = emac_hw_proc_entry; + + while (entry->name) { + entry->entry = e10000_create_proc_entry(emac->hwDir, entry->name, + (void*) ((U32) emac->regs + (U32) entry->addr)); + if (!entry->entry) + printk(KERN_EMERG "Failure creating /proc entry %s\n", entry->name); + + entry++; + } + } + } + } + break; + } + + default: + e10000_printr(bg_subcomp_xemac, e10000_ras_internal_error, + "Invalid EMAC type [%d].", type); + goto out; + } + +#ifndef CONFIG_BGP_E10000_EMAC_LOOPBACK + /* Initialize the PHY. */ + emac->phy.phy_id = 0; + emac->phy.full_duplex = 1; + emac->phy.dev = netDev; +#endif + + /* Request IRQ. */ + rc = request_irq(netDev->irq, emac_irq, IRQF_DISABLED, "BGP EMAC IRQ", (void*) netDev); + if (rc) { + e10000_printr(bg_subcomp_xemac, emac_ras_irq_not_available, + "Failure requesting IRQ [%d] - rc = %d", netDev->irq, rc); + goto out; + } + + emac->tomal = tomal; + emac->channel = channel; + emac->netDev = netDev; + memset(&emac->stats, 0, sizeof(emac->stats)); + spin_lock_init(&emac->lock); + emac->opened = 0; + + goto out; + +out: + PRINTK(DBG_EMAC | DBG_LEVEL2, "exit rc=%d\n", rc); + + return rc; +} + + +int emac_configure(EMAC* emac) +{ + int rc = 0; + + PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac); + + switch (emac->type) { + case EMAC_TYPE_XEMAC: { + XEMACRegs* reg = (XEMACRegs*) emac->regs; + U32 mode1 = XEMAC_MODE1_TRQ | XEMAC_MODE1_RFS8K | + XEMAC_MODE1_TFS8K | XEMAC_MODE1_JBEN | + XEMAC_MODE1_PSEN | XEMAC_MODE1_IFEN | + XEMAC_MODE1_OPB133MHZ | 0x00001000; + U32 rxMode = XEMAC_RX_MODE_SPAD | XEMAC_RX_MODE_SFCS | XEMAC_RX_MODE_PMME | + XEMAC_RX_MODE_MAE | XEMAC_RX_MODE_IAE | XEMAC_RX_MODE_BAE | XEMAC_RX_MODE_LFD | + XEMAC_RX_MODE_RFAF_16_32; + + /* We must accept multicast frames so that pause frames aren't discarded. */ + /* This means that EMAC must have multicast mode enabled and promiscuous multicast */ + /* mode enabled. */ + if (emac->netDev->flags & IFF_PROMISC) + rxMode |= XEMAC_RX_MODE_PME; + out_be32(®->rxMode, rxMode); + out_be32(®->rxLowHighWaterMark, 0x00800100); + out_be32(®->pauseTimerValue, 0x1000); + +#ifdef CONFIG_BGP_E10000_EMAC_LOOPBACK + mode1 |= XEMAC_MODE1_LPEN; +#else + mode1 |= XEMAC_MODE1_SDR; +#endif + out_be32(®->mode1, mode1); + out_be32(®->txMode1, 0x02200240); + out_be32(®->txRequestThreshold, 0x17000000); + break; + } + } + + PRINTK(DBG_EMAC | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + +void emac_exit(EMAC* emac) +{ + PRINTK(DBG_EMAC | DBG_LEVEL2, "entry\n"); + + /* Remove /proc entries. */ + if (emac->emacDir) { + if (emac->hwDir) { + E10000_PROC_ENTRY* entry = emac_hw_proc_entry; + + while (entry->name) { + if (entry->entry) { + remove_proc_entry(entry->entry->name, emac->emacDir); + entry->entry = NULL; + } + entry++; + } + + remove_proc_entry(emac->hwDir->name, emac->emacDir); + emac->hwDir = NULL; + } + remove_proc_entry(emac->emacDir->name, emac->parentDir); + emac->emacDir = NULL; + } + + /* Free the IRQ. */ + free_irq(emac->netDev->irq, (void*) emac->netDev); + + PRINTK(DBG_EMAC | DBG_LEVEL2, "exit\n"); + + return; +} diff --git a/drivers/net/bgp_e10000/bgp_emac.h b/drivers/net/bgp_e10000/bgp_emac.h new file mode 100644 index 00000000000000..3072b127ad621d --- /dev/null +++ b/drivers/net/bgp_e10000/bgp_emac.h @@ -0,0 +1,356 @@ +/* + * bgp_emac.h: XEMAC definition for BlueGene/P 10 GbE driver + * + * Copyright (c) 2007, 2010 International Business Machines + * Author: Andrew Tauferner <ataufer@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef _BGP_EMAC_H +#define _BGP_EMAC_H + +#include <linux/netdevice.h> +#include <linux/mii.h> +#include <linux/delay.h> +#include <linux/crc32.h> +#include <linux/proc_fs.h> +#include <asm/bluegene.h> +#include <asm/bluegene_ras.h> + +#include "bgp_tomal.h" +#include "bgp_e10000.h" + + +#define XEMAC_IRQ_GROUP 9 +#define XEMAC_IRQ_GINT 0 +#define XEMAC_IRQ bic_hw_to_irq(XEMAC_IRQ_GROUP, XEMAC_IRQ_GINT) + +#define XEMAC_BASE_ADDRESS 0x720004000ULL + + + +typedef volatile struct _XEMACRegs { /* Offset Description */ + U32 mode0; /* 00 mode register 0 */ +#define XEMAC_MODE0_RXIDL 0x80000000 +#define XEMAC_MODE0_TXIDL 0x40000000 +#define XEMAC_MODE0_SRST 0x20000000 +#define XEMAC_MODE0_TXEN 0x10000000 +#define XEMAC_MODE0_RXEN 0x08000000 +#define XEMAC_MODE0_WUEN 0x04000000 + U32 mode1; /* 04 mode register 1 */ +#define XEMAC_MODE1_SDR 0x80000000 +#define XEMAC_MODE1_LPEN 0x40000000 +#define XEMAC_MODE1_VLEN 0x20000000 +#define XEMAC_MODE1_IFEN 0x10000000 +#define XEMAC_MODE1_PSEN 0x08000000 +#define XEMAC_MODE1_RFS2K 0x00100000 +#define XEMAC_MODE1_RFS4K 0x00180000 +#define XEMAC_MODE1_RFS8K 0x00200000 +#define XEMAC_MODE1_RFS16K 0x00280000 +#define XEMAC_MODE1_RFS32K 0x00300000 +#define XEMAC_MODE1_RFS64K 0x00380000 +#define XEMAC_MODE1_TFS2K 0x00020000 +#define XEMAC_MODE1_TFS4K 0x00030000 +#define XEMAC_MODE1_TFS8K 0x00040000 +#define XEMAC_MODE1_TFS16K 0x00050000 +#define XEMAC_MODE1_TFS32K 0x00060000 +#define XEMAC_MODE1_TRQ 0x00008000 +#define XEMAC_MODE1_JBEN 0x00000800 +#define XEMAC_MODE1_OPB66MHZ 0x00000008 +#define XEMAC_MODE1_OPB83MHZ 0x00000010 +#define XEMAC_MODE1_OPB100MHZ 0x00000018 +#define XEMAC_MODE1_OPB133MHZ 0x00000020 + U32 txMode0; /* 08 TX mode register 0 */ +#define XEMAC_TX_MODE0_GNP 0x80000000 +#define XEMAC_TX_MODE0_TFAE_2_4 0x00000001 +#define XEMAC_TX_MODE0_TFAE_4_8 0x00000002 +#define XEMAC_TX_MODE0_TFAE_8_16 0x00000003 +#define XEMAC_TX_MODE0_TFAE_16_32 0x00000004 +#define XEMAC_TX_MODE0_TFAE_32_64 0x00000005 +#define XEMAC_TX_MODE0_TFAE_64_128 0x00000006 +#define XEMAC_TX_MODE0_TFAE_128_256 0x00000007 + U32 txMode1; /* 0C TX mode register 1 */ + U32 rxMode; /* 10 RX mode register */ +#define XEMAC_RX_MODE_SPAD 0x80000000 +#define XEMAC_RX_MODE_SFCS 0x40000000 +#define XEMAC_RX_MODE_ARRF 0x20000000 +#define XEMAC_RX_MODE_ARFE 0x10000000 +#define XEMAC_RX_MODE_LFD 0x08000000 +#define XEMAC_RX_MODE_ARIE 0x04000000 +#define XEMAC_RX_MODE_PPF 0x02000000 +#define XEMAC_RX_MODE_PME 0x01000000 +#define XEMAC_RX_MODE_PMME 0x00800000 +#define XEMAC_RX_MODE_IAE 0x00400000 +#define XEMAC_RX_MODE_MIAE 0x00200000 +#define XEMAC_RX_MODE_BAE 0x00100000 +#define XEMAC_RX_MODE_MAE 0x00080000 +#define XEMAC_RX_MODE_PUME 0x00040000 +#define XEMAC_RX_MODE_SIAE 0x00020000 +#define XEMAC_RX_MODE_RFAF_2_4 0x00000001 +#define XEMAC_RX_MODE_RFAF_4_8 0x00000002 +#define XEMAC_RX_MODE_RFAF_8_16 0x00000003 +#define XEMAC_RX_MODE_RFAF_16_32 0x00000004 +#define XEMAC_RX_MODE_RFAF_32_64 0x00000005 +#define XEMAC_RX_MODE_RFAF_64_128 0x00000006 + U32 interruptStatus; /* 14 interrupt status register */ +#define XEMAC_IS_TXPE 0x20000000 +#define XEMAC_IS_RXPE 0x10000000 +#define XEMAC_IS_TFEI 0x08000000 +#define XEMAC_IS_RFFI 0x04000000 +#define XEMAC_IS_OVR 0x02000000 +#define XEMAC_IS_PSF 0x01000000 +#define XEMAC_IS_BDF 0x00800000 +#define XEMAC_IS_RTF 0x00400000 +#define XEMAC_IS_LF 0x00200000 +#define XEMAC_IS_BFCS 0x00080000 +#define XEMAC_IS_FTL 0x00040000 +#define XEMAC_IS_ORE 0x00020000 +#define XEMAC_IS_IRE 0x00010000 +#define XEMAC_IS_DB 0x00000100 +#define XEMAC_IS_TE 0x00000040 +#define XEMAC_IS_MMS 0x00000002 +#define XEMAC_IS_MMF 0x00000001 + U32 interruptStatusEnable; /* 18 interrupt status enable register */ + U32 individualAddrH; /* 1C bits 0-15 of main station unique address */ + U32 individualAddrL; /* 20 bits 16-47 of main station unique address */ + U32 vlanTPID; /* 24 VLAN tag ID */ + U32 vlanTCI; /* 28 VLAN TCI register */ + U32 pauseTimerValue; /* 2C pause timer register */ + U32 individualAddrHashTable[4]; /* 30 individual addr. hash registers */ + U32 groupAddrHashTable[4]; /* 40 group addr. hash register 1 */ + U32 lastSourceAddrH; /* 50 bits 0-15 of last source address */ + U32 lastSourceAddrL; /* 54 bits 16-47 of last source address */ + U32 interPacketGapValue; /* 58 inter packet gap register */ + U32 staCtrl; /* 5C STA control register */ +#define XEMAC_STAC_MGO 0x00008000 +#define XEMAC_STAC_PHE 0x00004000 +#define XEMAC_STAC_IM 0x00002000 +#define XEMAC_STAC_MII_READ 0x00001000 +#define XEMAC_STAC_MII_WRITE 0x00000800 +#define XEMAC_STAC_MDIO_ADDRESS 0x00002000 +#define XEMAC_STAC_MDIO_WRITE 0x00002800 +#define XEMAC_STAC_MDIO_READ 0x00003800 +#define XEMAC_STAC_MDIO_READ_INC 0x00003000 + U32 txRequestThreshold; /* 60 TX request threshold register */ +#define XEMAC_TRT_64 0x00000000 +#define XEMAC_TRT_128 0x01000000 +#define XEMAC_TRT_192 0x02000000 +#define XEMAC_TRT_256 0x03000000 + U32 rxLowHighWaterMark; /* 64 RX high/low water mark register */ + U32 sopCommandMode; /* 68 SOP command mode register */ + U32 secondaryIndividualAddrH; /* 6C bits 0-15 of sec. individual addr. reg */ + U32 secondaryIndividualAddrL; /* 70 bits 16-47 of sec. individual addr. reg */ + U32 txOctetsCounter1; /* 74 bits 0-31 of total TX octets (read first) */ + U32 txOctetsCounter2; /* 78 bits 32-63 of total TX octets (read last) */ + U32 rxOctetsCounter1; /* 7C bits 0-31 of total RX octets (read first) */ + U32 rxOctetsCounter2; /* 80 bits 32-63 of total RX octets (read last) */ + U32 revisionID; /* 84 revision ID */ + U32 hwDbg; /* 88 hardware debug register */ +} XEMACRegs; + + + + +typedef struct _EMAC { + U32 type; +#define EMAC_TYPE_EMAC4 4 +#define EMAC_TYPE_XEMAC 10 + XEMACRegs* regs; + TOMAL* tomal; + U8 channel; + struct mii_if_info phy; + struct net_device* netDev; + struct net_device_stats stats; + spinlock_t lock; + U8 opened; + struct proc_dir_entry* parentDir; + struct proc_dir_entry* emacDir; + struct proc_dir_entry* hwDir; + +} EMAC; + + +typedef enum { + emac_ras_none = 0x00, + emac_ras_timeout = 0x01, + emac_ras_ioremap_error = 0x02, + emac_ras_irq_not_available = 0x03, + emac_ras_sta_addr_error = 0x04, + emac_ras_sta_read_error = 0x05, + emac_ras_sta_write_error = 0x06, + emac_ras_irq_unknown = 0x07, + + emac_ras_internal_error = 0xfe, + emac_ras_max = 0xff +} emac_ras_id; + +typedef enum { + phy_ras_none = 0x00, + phy_ras_timeout = 0x01, + phy_ras_not_found = 0x02, + + phy_ras_max = 0xff +} phy_ras_id; + + +int __init emac_init(void* devMapAddr, + EMAC* emac, + U32 type, + TOMAL* tomal, + U8 channel, + struct net_device* netDev, + struct proc_dir_entry* procDir); + +int emac_configure(EMAC* emac); + + + + +static inline int emac_soft_reset(EMAC* emac) +{ + int rc = 0; + U32 i; + + PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac); + + /* Set the reset bit and wait for it to clear. */ + out_be32(&emac->regs->mode0, XEMAC_MODE0_SRST); + for (i = 200; (in_be32(&emac->regs->mode0) & XEMAC_MODE0_SRST) && i; i--) + udelay(10000); + if (!i) { + e10000_printr(bg_subcomp_xemac, emac_ras_timeout, + "XEMAC failed reset"); + rc = -ETIME; + } + + return rc; +} + + + +static inline int emac_rx_enable(EMAC* emac) +{ + U32 reg = in_be32(&emac->regs->mode0); + + out_be32(&emac->regs->mode0, reg | XEMAC_MODE0_RXEN); + + return 0; +} + + +static inline int emac_rx_disable(EMAC* emac) +{ + U32 reg = in_be32(&emac->regs->mode0); + + out_be32(&emac->regs->mode0, reg & ~XEMAC_MODE0_RXEN); + + return 0; +} + + +static inline int emac_tx_enable(EMAC* emac) +{ + U32 reg = in_be32(&emac->regs->mode0); + + out_be32(&emac->regs->mode0, reg | XEMAC_MODE0_TXEN); + reg = in_be32(&emac->regs->txMode0); + out_be32(&emac->regs->txMode0, reg | XEMAC_TX_MODE0_GNP); + + return 0; +} + + +static inline int emac_tx_disable(EMAC* emac) +{ + U32 reg = in_be32(&emac->regs->mode0); + + out_be32(&emac->regs->mode0, reg & ~XEMAC_MODE0_TXEN); + + return 0; +} + +static inline int emac_irq_enable(EMAC* emac) +{ + out_be32(&emac->regs->interruptStatusEnable, XEMAC_IS_TXPE | XEMAC_IS_RXPE | + XEMAC_IS_TFEI | XEMAC_IS_RFFI | XEMAC_IS_OVR | XEMAC_IS_BDF | + XEMAC_IS_RTF | XEMAC_IS_LF | XEMAC_IS_BFCS | XEMAC_IS_FTL | + XEMAC_IS_ORE | XEMAC_IS_IRE | XEMAC_IS_DB | XEMAC_IS_TE); + + return 0; +} + +static inline int emac_irq_disable(EMAC* emac) +{ + out_be32(&emac->regs->interruptStatusEnable, 0); + + return 0; +} + +static inline int emac_set_mac_address(EMAC* emac) +{ + int rc = 0; + + PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac); + + switch (emac->type) { + case EMAC_TYPE_XEMAC: { + XEMACRegs* reg = (XEMACRegs*) emac->regs; + struct net_device* netDev = emac->netDev; + + out_be32(®->individualAddrH, netDev->dev_addr[0] << 8 | + netDev->dev_addr[1]); + out_be32(®->individualAddrL, netDev->dev_addr[2] << 24 | + netDev->dev_addr[3] << 16 | netDev->dev_addr[4] << 8 | + netDev->dev_addr[5]); + break; + } + } + + PRINTK(DBG_EMAC | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +static inline int emac_set_multicast_list(EMAC* emac) +{ + int rc = 0; + XEMACRegs* regs = (XEMACRegs*) emac->regs; + + PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac); + + if (emac->netDev->flags & IFF_MULTICAST && + emac->netDev->mc_count > 0) { + U16 groupAddrHashTable[4] = {0, 0, 0, 0}; + struct dev_mc_list* dmi; + + for (dmi = emac->netDev->mc_list; dmi; dmi = dmi->next) { + U32 crc = ether_crc(6, (char*) dmi->dmi_addr); + U32 bit = 63 - (crc >> 26); + + groupAddrHashTable[bit >> 4] |= + 0x8000 >> (bit & 0x0f); + } + regs->groupAddrHashTable[0] = groupAddrHashTable[0]; + regs->groupAddrHashTable[1] = groupAddrHashTable[1]; + regs->groupAddrHashTable[2] = groupAddrHashTable[2]; + regs->groupAddrHashTable[3] = groupAddrHashTable[3]; + } + + PRINTK(DBG_EMAC | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +void emac_exit(EMAC* emac); + + + + +#endif diff --git a/drivers/net/bgp_e10000/bgp_tomal.c b/drivers/net/bgp_e10000/bgp_tomal.c new file mode 100644 index 00000000000000..4878c8ffb92f07 --- /dev/null +++ b/drivers/net/bgp_e10000/bgp_tomal.c @@ -0,0 +1,1892 @@ +/* + * bgp_tomal.c: TOMAL device for BlueGene/P 10 GbE driver + * + * Copyright (c) 2007, 2010 International Business Machines + * Author: Andrew Tauferner <ataufer@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify i + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + + + +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/dma-mapping.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/tcp.h> +#include <linux/udp.h> + + +#include <asm/bluegene_ras.h> +#include <asm/udbg.h> + +#include "bgp_e10000.h" +#include "bgp_tomal.h" +#include "bgp_emac.h" + + +static RxDescSegment* tomal_alloc_rx_segment(U32 numDescriptors); +static void tomal_free_rx_segment(RxDescSegment* segment); +static TxDescSegment* tomal_alloc_tx_segment(U32 numDescriptors); +static void tomal_free_tx_segment(TxDescSegment* segment); +static irqreturn_t tomal_irq0(int irq, void* data); +static irqreturn_t tomal_irq1(int irq, void* data); + + +/* TOMAL hardware accessible through /proc/driver/e10000/tomal/hw */ +static E10000_PROC_ENTRY tomal_hw_proc_entry[] = { + { "configurationCtrl", (void*) 0x0000, NULL }, + { "revisionID", (void*) 0x0060, NULL }, + { "packetDataEngineCtrl", (void*) 0x0400, NULL }, + { "txNotificationCtrl", (void*) 0x0600, NULL }, + { "txMinTimer", (void*) 0x0610, NULL }, + { "txMaxTimer", (void*) 0x0620, NULL }, + { "txMaxFrameNum0", (void*) 0x06c0, NULL }, + { "txMaxFrameNum1", (void*) 0x07c0, NULL }, + { "txMinFrameNum0", (void*) 0x06d0, NULL }, + { "txMinFrameNum1", (void*) 0x07d0, NULL }, + { "txFramePerServiceCtrl", (void*) 0x0650, NULL }, + { "txHWCurrentDescriptorAddrH0", (void*) 0x0660, NULL }, + { "txHWCurrentDescriptorAddrH1", (void*) 0x0760, NULL }, + { "txHWCurrentDescriptorAddrL0", (void*) 0x0670, NULL }, + { "txHWCurrentDescriptorAddrL1", (void*) 0x0770, NULL }, + { "txPendingFrameCount0", (void*) 0x0690, NULL }, + { "txPendingFrameCount1", (void*) 0x0790, NULL }, + { "txAddPostedFrames0", (void*) 0x06a0, NULL }, + { "txAddPostedFrames1", (void*) 0x07a0, NULL }, + { "txNumberOfTransmittedFrames0", (void*) 0x06b0, NULL }, + { "txNumberOfTransmittedFrames1", (void*) 0x07b0, NULL }, + { "txEventStatus0", (void*) 0x06e0, NULL }, + { "txEventStatus1", (void*) 0x07e0, NULL }, + { "txEventMask0", (void*) 0x06f0, NULL }, + { "txEventMask1", (void*) 0x07f0, NULL }, + { "rxNotificationCtrl", (void*) 0x0f00, NULL }, + { "rxMinTimer", (void*) 0x0f10, NULL }, + { "rxMaxTimer", (void*) 0x0f20, NULL }, + { "rxMaxFrameNum0", (void*) 0x1080, NULL }, + { "rxMaxFrameNum1", (void*) 0x1180, NULL }, + { "rxMinFrameNum0", (void*) 0x1090, NULL }, + { "rxMinFrameNum1", (void*) 0x1190, NULL }, + { "rxHWCurrentDescriptorAddrH0", (void*) 0x1020, NULL }, + { "rxHWCurrentDescriptorAddrH1", (void*) 0x1120, NULL }, + { "rxHWCurrentDescriptorAddrL0", (void*) 0x1030, NULL }, + { "rxHWCurrentDescriptorAddrL1", (void*) 0x1130, NULL }, + { "rxAddFreeBytes0", (void*) 0x1040, NULL }, + { "rxAddFreeBytes1", (void*) 0x1140, NULL }, + { "rxTotalBuffersSize0", (void*) 0x1050, NULL }, + { "rxTotalBuffersSize1", (void*) 0x1150, NULL }, + { "rxNumberOfReceivedFrames0", (void*) 0x1060, NULL }, + { "rxNumberOfReceivedFrames1", (void*) 0x1160, NULL }, + { "rxDroppedFramesCount0", (void*) 0x1070, NULL }, + { "rxDroppedFramesCount1", (void*) 0x1170, NULL }, + { "rxEventStatus0", (void*) 0x10a0, NULL }, + { "rxEventStatus1", (void*) 0x11a0, NULL }, + { "rxEventMask0", (void*) 0x10b0, NULL }, + { "rxEventMask1", (void*) 0x11b0, NULL }, + { "softwareNonCriticalErrorsStatus0", (void*) 0x1800, NULL }, + { "softwareNonCriticalErrorsStatus1", (void*) 0x1900, NULL }, + { "softwareNonCriticalErrorsEnable0", (void*) 0x1810, NULL }, + { "softwareNonCriticalErrorsEnable1", (void*) 0x1910, NULL }, + { "softwareNonCriticalErrorsMask0", (void*) 0x1820, NULL }, + { "softwareNonCriticalErrorsMask1", (void*) 0x1920, NULL }, + { "receiveDataBufferSpace", (void*) 0x1900, NULL }, + { "transmitDataBuffer0FreeSpace", (void*) 0x1910, NULL }, + { "transmitDataBuffer1FreeSpace", (void*) 0x1920, NULL }, + { "rxMACStatus0", (void*) 0x1b20, NULL }, + { "rxMACStatus1", (void*) 0x1c20, NULL }, + { "rxMACStatusEnable0", (void*) 0x1b30, NULL }, + { "rxMACStatusEnable1", (void*) 0x1c30, NULL }, + { "rxMACStatusMask0", (void*) 0x1b40, NULL }, + { "rxMACStatusMask1", (void*) 0x1c40, NULL }, + { "txMACStatus0", (void*) 0x1b50, NULL }, + { "txMACStatus1", (void*) 0x1c50, NULL }, + { "txMACStatusEnable0", (void*) 0x1b60, NULL }, + { "txMACStatusEnable1", (void*) 0x1c60, NULL }, + { "txMACStatusMask0", (void*) 0x1b70, NULL }, + { "txMACStatusMask1", (void*) 0x1c70, NULL }, + { "hardwareErrorsStatus", (void*) 0x1e00, NULL }, + { "hardwareErrorsEnable", (void*) 0x1e10, NULL }, + { "hardwareErrorsMask", (void*) 0x1e20, NULL }, + { "softwareCriticalErrorsStatus", (void*) 0x1f00, NULL }, + { "softwareCriticalErrorsEnable", (void*) 0x1f10, NULL }, + { "softwareCriticalErrorsMask", (void*) 0x1f20, NULL }, + { "receiveDescriptorBadCodeFEC", (void*) 0x1f30, NULL }, + { "transmitDescriptorBadCodeFEC", (void*) 0x1f40, NULL }, + { "interruptStatus", (void*) 0x1f80, NULL }, + { "interruptRoute", (void*) 0x1f90, NULL }, + { "rxMACBadStatusCounter0", (void*) 0x2060, NULL }, + { "rxMACBadStatusCounter1", (void*) 0x2160, NULL }, + { "debugVectorsCtrl", (void*) 0x3000, NULL }, + { "debugVectorsReadData", (void*) 0x3010, NULL }, + { NULL, (void*) 0, NULL } +}; + + +/* TOMAL software accessible through /proc/driver/e10000/tomal/sw */ +static E10000_PROC_ENTRY tomal_sw_proc_entry[] = { + { "rxMaxBuffers0", NULL, NULL }, + { "rxMaxBuffers1", NULL, NULL }, + { "rxBufferSize0", NULL, NULL }, + { "rxBufferSize1", NULL, NULL }, + { "rxDescSegmentAddr0", NULL, NULL }, + { "rxDescSegmentAddr1", NULL, NULL }, + { "rxOldDescSegmentAddr0", NULL, NULL }, + { "rxOldDescSegmentAddr1", NULL, NULL }, + { "txMaxBuffers0", NULL, NULL }, + { "txMaxBuffers1", NULL, NULL }, + { "txPendingBuffers0", NULL, NULL }, + { "txPendingBuffers1", NULL, NULL }, + { "txNumberOfTransmittedFrames0", NULL, NULL }, + { "txNumberOfTransmittedFrames1", NULL, NULL }, + { "txDescSegmentAddr0", NULL, NULL }, + { "txDescSegmentAddr1", NULL, NULL }, + { "txOldDescSegmentAddr0", NULL, NULL }, + { "txOldDescSegmentAddr1", NULL, NULL }, + { "txFreeDescSegmentAddr0", NULL, NULL }, + { "txFreeDescSegmentAddr1", NULL, NULL }, + { "irq0", NULL, NULL }, + { "irq1", NULL, NULL }, + { "numberOfNetrxDrops", NULL, NULL }, + { "numberOfHwDrops0", NULL, NULL }, + { "numberOfHwDrops1", NULL, NULL }, + { "numberOfNotLast", NULL, NULL }, +/* { "txChecksumNONE", NULL, NULL }, */ +/* { "txChecksumPARTIAL", NULL, NULL }, */ +/* { "txChecksumUNNECESSARY", NULL, NULL }, */ +/* { "txChecksumCOMPLETE", NULL, NULL }, */ + { NULL, NULL, NULL } +}; + + +/* Allocate a single Rx descriptor segment with the specified number of descriptors. */ +static RxDescSegment* tomal_alloc_rx_segment(U32 numDescriptors) +{ + RxDescSegment* segment = NULL; + RxDesc* desc; + size_t size = numDescriptors * sizeof(RxDesc) + sizeof(BranchDesc); + dma_addr_t dmaHandle; + + /* Allocate descriptor storage. */ + desc = (RxDesc*) dma_alloc_coherent(NULL, size, &dmaHandle, GFP_KERNEL); + if (desc) { + /* Clear the descriptors. */ + memset((void*) desc, 0, size); + + /* Allocate a segment. */ + segment = kmalloc(sizeof(RxDescSegment), GFP_KERNEL); + if (segment) { + segment->size = size; + segment->dmaHandle = dmaHandle; + segment->desc = desc; + + segment->branchDesc = (BranchDesc*) &desc[numDescriptors]; + segment->branchDesc->code = TOMAL_BRANCH_CODE; + segment->branchDesc->reserved = segment->branchDesc->nextDescAddrH = 0; + segment->branchDesc->nextDescAddrL = (U32) NULL; + + /* Allocate storage for buffer pointers. */ + segment->skb = (struct sk_buff**) + kmalloc(numDescriptors * sizeof(struct sk_buff*) + + sizeof(struct sk_buff*), GFP_KERNEL); + if (!segment->skb) { + kfree((void*) segment); + segment = NULL; + dma_free_coherent(NULL, size, (void*) desc, dmaHandle); + } else { + memset((void*) segment->skb, 0, + numDescriptors * sizeof(struct sk_buff*) + sizeof(struct sk_buff*)); + segment->currDesc = segment->desc; + segment->currSkb = segment->skb; + segment->next = segment; + } + } else + dma_free_coherent(NULL, size, (void*) desc, dmaHandle); + } + + return segment; +} + + +/* Allocate descriptor segment(s) until the specified number of Rx descriptors have been */ +/* created. */ +int tomal_alloc_rx_segments(TOMAL* tomal, + U8 channel, + U32 totalDescriptors) +{ + RxDescSegment* firstSegment = (RxDescSegment*) NULL; + RxDescSegment* prevSegment = (RxDescSegment*) NULL; + RxDescSegment* segment = (RxDescSegment*) NULL; + U32 numDescriptors = totalDescriptors; + U8 first = 1; + int rc; + + /* Allocate RX segments until the indicated number of descriptors have been */ + /* created. */ + while (totalDescriptors && numDescriptors >= 1) { + /* Allocate an RX descriptor segment. */ + segment = tomal_alloc_rx_segment(numDescriptors); + if (segment) { + /* If this was the first segment then remember it. */ + if (first) { + firstSegment = prevSegment = segment; + first = 0; + } + + /* Link the previous segment to the new segment. */ + prevSegment->branchDesc->nextDescAddrL = (U32) segment->dmaHandle; + prevSegment->next = segment; + + totalDescriptors -= numDescriptors; + } else { + /* Failure allocating a segment of the requested size. Reduce the size. */ + numDescriptors /= 2; + } + } + + /* All segments created? */ + if (!segment) { + RxDescSegment* nextSegment = NULL; + + /* Free any segments that were allocated. */ + segment = prevSegment = firstSegment; + while (segment) { + nextSegment = segment->next; + BUG_ON(nextSegment == segment); + + tomal_free_rx_segment(segment); + + segment = nextSegment; + } + tomal->rxDescSegment[channel] = (RxDescSegment*) NULL; + + e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error, + "Failure allocating RX descriptor segment - totalDescriptors=%d.", + totalDescriptors); + rc = -ENOMEM; + } else { + /* Link the last segment to the first. */ + segment->branchDesc->nextDescAddrL = (U32) firstSegment->dmaHandle; + segment->next = firstSegment; + + tomal->rxDescSegment[channel] = segment; + rc = 0; + } + + /* Update TOMAL's view of the RX descriptors. */ + out_be32(&tomal->regs[channel]->rxHWCurrentDescriptorAddrH, 0); + out_be32(&tomal->regs[channel]->rxHWCurrentDescriptorAddrL, + (U32) tomal->rxDescSegment[channel]->dmaHandle); + + tomal->oldRxSegment[channel] = tomal->rxDescSegment[channel]; + tomal->oldRxSegment[channel]->currDesc = tomal->oldRxSegment[channel]->desc; + tomal->oldRxSegment[channel]->currSkb = tomal->oldRxSegment[channel]->skb; + + return rc; +} + + +/* Free the specified Rx descriptor segment. */ +static void tomal_free_rx_segment(RxDescSegment* segment) +{ + RxDesc* desc; + struct sk_buff** skb; + + /* Look for any descriptors awaiting processing. */ + for (desc = segment->desc, skb = segment->skb; + desc && desc != (RxDesc*) segment->branchDesc; desc++, skb++) { + if (*skb) { + dma_unmap_single(NULL, desc->buffHeadAddrL, + desc->postedLength, DMA_FROM_DEVICE); + dev_kfree_skb_any(*skb); + *skb = NULL; + } + + desc->postedLength = 0; + } + + /* Free SKB pointer storage. */ + if (segment->skb) + kfree(segment->skb); + + /* Free the descriptor storage. */ + if (segment->desc) + dma_free_coherent(NULL, segment->size, (void*) segment->desc, segment->dmaHandle); + + /* Free the segment. */ + kfree((void*) segment); + + return; +} + + +/* Free all Rx descriptor segments. */ +void tomal_free_rx_segments(TOMAL* tomal, + U8 channel) +{ + RxDescSegment* segment = tomal->rxDescSegment[channel]; + RxDescSegment* startSegment = segment; + RxDescSegment* nextSegment; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel); + + while (segment) { + nextSegment = segment->next; + + tomal_free_rx_segment(segment); + segment = nextSegment; + + if (segment == startSegment) + break; + } + tomal->rxDescSegment[channel] = NULL; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit\n"); + + return; +} + + +/* Free all Rx buffers. */ +int tomal_free_rx_buffers(TOMAL* tomal, + U8 channel) +{ + int rc = 0; + RxDescSegment* segment = tomal->rxDescSegment[channel]; + RxDescSegment* startSegment = segment; + RxDesc* desc; + struct sk_buff** skb; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel); + + while (segment) { + /* Look for any descriptors awaiting processing. */ + for (desc = segment->desc, skb = segment->skb; + desc != (RxDesc*) segment->branchDesc; desc++, skb++) { + if (*skb) { + dma_unmap_single(NULL, desc->buffHeadAddrL, + desc->postedLength, DMA_FROM_DEVICE); + dev_kfree_skb_any(*skb); + *skb = NULL; + } + + desc->postedLength = 0; + } + + segment = segment->next; + if (segment == startSegment) + break; + } + + /* Force TOMAL's total buffer size register back to zero. We do this by adding */ + /* enough buffer space to make this 20 bit register wrap around. */ + while (in_be32(&tomal->regs[channel]->rxTotalBufferSize) && + (0x00100000 - in_be32(&tomal->regs[channel]->rxTotalBufferSize)) > 0x0000ffff) + out_be32(&tomal->regs[channel]->rxAddFreeBytes, 0xffff); + if (in_be32(&tomal->regs[channel]->rxTotalBufferSize)) + out_be32(&tomal->regs[channel]->rxAddFreeBytes, 0x00100000 - in_be32(&tomal->regs[channel]->rxTotalBufferSize)); + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +/* Returns the number of RX buffers that are waiting to be processed. An error is indicated */ +/* by a negative value. The caller should be holding the TOMAL lock for the specified channel. */ +int tomal_pending_rx_buffers(TOMAL* tomal, + U8 channel) +{ + int rc = 0; + RxDescSegment* segment = tomal->rxDescSegment[channel]; + RxDescSegment* startSegment = segment; + RxDesc* desc; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel); + + do { + /* Look for any descriptors awaiting processing. */ + for (desc = segment->desc; desc != (RxDesc*) segment->branchDesc; desc++) + if ((desc->status & TOMAL_RX_LAST) && desc->totalFrameLength) + rc++; + + segment = segment->next; + } while (segment != startSegment); + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +/* Returns the number of TX buffers that are queued for transmission. An error is indicated */ +/* by a negative value. The caller should be holding the TOMAL TX lock for the specified channel. */ +int tomal_pending_tx_buffers(TOMAL* tomal, + U8 channel) +{ + int rc = 0; + TxDescSegment* segment = tomal->txDescSegment[channel]; + TxDescSegment* startSegment = segment; + TxDesc* desc; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel); + + do { + /* Look for any descriptors awaiting processing. */ + for (desc = segment->desc; desc != (TxDesc*) segment->branchDesc; desc++) + if (desc->postedLength) + rc++; + + segment = segment->next; + } while (segment != startSegment); + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +/* Allocate a Tx descriptor segment with the specified number of descriptors. */ +static TxDescSegment* tomal_alloc_tx_segment(U32 numDescriptors) +{ + TxDescSegment* segment = NULL; + TxDesc* desc; + size_t size = numDescriptors * sizeof(TxDesc) + sizeof(BranchDesc); + dma_addr_t dmaHandle; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - numDescriptors=%d\n", numDescriptors); + + /* Allocate descriptor storage. */ + desc = (TxDesc*) dma_alloc_coherent(NULL, size, &dmaHandle, GFP_KERNEL); + if (desc) { + /* Clear the descriptor storage. */ + memset((void*) desc, 0, size); + + /* Allocate a segment. */ + segment = kmalloc(sizeof(TxDescSegment), GFP_KERNEL); + if (segment) { + segment->size = size; + segment->dmaHandle = dmaHandle; + segment->desc = desc; + + segment->branchDesc = (BranchDesc*) &segment->desc[numDescriptors]; + segment->branchDesc->code = TOMAL_BRANCH_CODE; + segment->branchDesc->reserved = segment->branchDesc->nextDescAddrH = 0; + segment->branchDesc->nextDescAddrL = (U32) NULL; + + /* Allocate storage for buffer pointers. */ + segment->skb = (struct sk_buff**) + kmalloc((numDescriptors+1) * sizeof(struct sk_buff*), GFP_KERNEL); + if (!segment->skb) { + kfree((void*) segment); + segment = NULL; + dma_free_coherent(NULL, size, (void*) segment->desc, segment->dmaHandle); + } else { + memset((void*) segment->skb, 0, + (numDescriptors+1) * sizeof(struct sk_buff*)); + segment->oldIndex = segment->freeIndex = 0; + segment->next = segment; /* by default point this segment at itself */ + } + } else + dma_free_coherent(NULL, size, (void*) desc, dmaHandle); + } + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - segment=%p\n", segment); + + return segment; +} + + +/* Allocate Tx descriptor segment(s) until the specified number of descriptors have been created. */ +int tomal_alloc_tx_segments(TOMAL* tomal, + U8 channel, + U32 totalDescriptors) +{ + TxDescSegment* firstSegment = (TxDescSegment*) NULL; + TxDescSegment* prevSegment = (TxDescSegment*) NULL; + TxDescSegment* segment = (TxDescSegment*) NULL; + U32 numDescriptors = totalDescriptors; + U8 first = 1; + int rc; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d, totalDescriptors=%d\n", tomal, + channel, totalDescriptors); + + /* Allocate TX segments until the indicated number of descriptors have been */ + /* created. */ + while (totalDescriptors && numDescriptors >= 1) { + /* Allocate an TX descriptor segment. */ + segment = tomal_alloc_tx_segment(numDescriptors); + if (segment) { + /* If this was the first segment then remember it. */ + if (first) { + firstSegment = prevSegment = segment; + first = 0; + } + + /* Link the previous segment to the new segment. */ + prevSegment->branchDesc->nextDescAddrL = (U32) segment->dmaHandle; + prevSegment->next = segment; + + totalDescriptors -= numDescriptors; + } else { + /* Failure allocating a segment of the requested size. Reduce the size. */ + numDescriptors /= 2; + } + } + + /* All segments created? */ + if (!segment) { + TxDescSegment* nextSegment = NULL; + + /* Free any segments that were allocated. */ + segment = prevSegment = firstSegment; + while (segment) { + nextSegment = segment->next; + BUG_ON(nextSegment == segment); + + tomal_free_tx_segment(segment); + + segment = nextSegment; + } + tomal->txDescSegment[channel] = (TxDescSegment*) NULL; + + e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error, + "TX descriptor allocation failure - totalDescriptors=%d.", + totalDescriptors); + rc = -ENOMEM; + } else { + /* Link the last segment to the first. */ + segment->branchDesc->nextDescAddrL = (U32) firstSegment->dmaHandle; + segment->next = firstSegment; + + tomal->txDescSegment[channel] = segment; + rc = 0; + } + + /* Tell TOMAL where the descriptor storage is. */ + out_be32(&tomal->regs[channel]->txHWCurrentDescriptorAddrH, 0); + out_be32(&tomal->regs[channel]->txHWCurrentDescriptorAddrL, + (U32) tomal->txDescSegment[channel]->dmaHandle); + tomal->pendingTxBuffers[channel] = 0; + tomal->oldTxSegment[channel] = tomal->freeTxSegment[channel] = tomal->txDescSegment[channel]; + tomal->freeTxSegment[channel]->freeIndex = tomal->freeTxSegment[channel]->oldIndex = + tomal->freeTxSegment[channel]->oldIndex = + tomal->numberOfTransmittedFrames[channel] = 0; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +/* Free all Tx descriptor segments. */ +void tomal_free_tx_segments(TOMAL* tomal, + U8 channel) +{ + TxDescSegment* segment = tomal->txDescSegment[channel]; + TxDescSegment* startSegment = segment; + TxDescSegment* nextSegment; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel); + + while (segment) { + nextSegment = segment->next; + + tomal_free_tx_segment(segment); + segment = nextSegment; + + if (segment == startSegment) + break; + } + tomal->txDescSegment[channel] = NULL; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit\n"); + + return; +} + + +/* Free the specified Tx segment. */ +void tomal_free_tx_segment(TxDescSegment* segment) +{ + TxDesc* desc; + struct sk_buff** skb; + + /* Look for any descriptors with an associated buffer. */ + for (desc = segment->desc, skb = segment->skb; + desc && desc != (TxDesc*) segment->branchDesc; desc++, skb++) { + if (*skb) { + dma_unmap_single(NULL, desc->buffHeadAddrL, + desc->postedLength, DMA_FROM_DEVICE); + dev_kfree_skb_any(*skb); + *skb = NULL; + } + desc->postedLength = 0; + } + + /* Free SKB pointer storage. */ + if (segment->skb) + kfree(segment->skb); + + /* Free the descriptor storage. */ + if (segment->desc) + dma_free_coherent(NULL, segment->size, (void*) segment->desc, segment->dmaHandle); + + /* Free the segment. */ + kfree((void*) segment); + + return; +} + + + +/* Free all Tx buffers. */ +void tomal_free_tx_buffers(TOMAL* tomal, + U8 channel) +{ + TxDescSegment* segment = tomal->txDescSegment[channel]; + TxDescSegment* startSegment = segment; + TxDesc* desc; + struct sk_buff** skb; + + while (segment) { + /* Look for any descriptors with an associated buffer. */ + for (desc = segment->desc, skb = segment->skb; + desc != (TxDesc*) segment->branchDesc; desc++, skb++) { + if (*skb) { + dma_unmap_single(NULL, desc->buffHeadAddrL, + desc->postedLength, DMA_FROM_DEVICE); + dev_kfree_skb_any(*skb); + *skb = NULL; + } + + desc->postedLength = 0; + } + + segment = segment->next; + if (segment == startSegment) + break; + } + + return; +} + + + +int tomal_process_tx_buffers(TOMAL* tomal, + U8 channel, + register U32 framesToProcess) +{ + register TxDescSegment* segment = tomal->oldTxSegment[channel]; + register TxDesc* desc = &segment->desc[segment->oldIndex]; + register int skbFrag = 0; + register int rc = 0; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel); + + /* Process the non-served descriptors, starting with the oldest. */ + tomal->numberOfTransmittedFrames[channel] += framesToProcess; + while (likely(framesToProcess)) { + /* Have we reached the end of the segment? */ + if (unlikely(desc == (TxDesc*) segment->branchDesc)) { + /* Reset the oldest descriptor pointer and move the oldest segment ahead. */ + segment->oldIndex = 0; + tomal->oldTxSegment[channel] = segment = segment->next; + desc = segment->desc; + } + + /* Process the current descriptor. */ + PRINTK(DBG_TOMAL | DBG_LEVEL3, "xmit of buffer [%x] complete\n", + desc->buffHeadAddrL); + + if (likely(desc->code & TOMAL_TX_LAST)) { + /* Unmap the buffer. Free the skb. Check descriptor status. Increment the */ + /* transmitted frame count. */ + dma_unmap_single(NULL, desc->buffHeadAddrL, desc->postedLength, DMA_TO_DEVICE); + dev_kfree_skb_irq(segment->skb[segment->oldIndex]); + segment->skb[segment->oldIndex] = NULL; + skbFrag = 0; + framesToProcess--; + if (unlikely(!(desc->wBStatus & TOMAL_TX_STATUS_GOOD))) + ((EMAC*) netdev_priv(tomal->netDev[channel]))->stats.tx_errors++; + } else + /* We have a fragmented skb and the first buffer is a special */ + /* case because we didn't map an entire page for it. Unmap */ + /* the buffer now. */ + if (!skbFrag) { + dma_unmap_single(NULL, desc->buffHeadAddrL, + desc->postedLength, DMA_TO_DEVICE); + skbFrag = 1; + } else + /* Unmap the page that contains the current fragment. */ + dma_unmap_page(NULL, desc->buffHeadAddrL, + desc->postedLength, DMA_TO_DEVICE); + + /* Advance to next descriptor. */ + desc++; + segment->oldIndex++; + rc++; + } + + tomal->pendingTxBuffers[channel] -= rc; + + /* Restart the TX counters. */ + out_be32(&tomal->regs[0]->txNotificationCtrl, (channel ? TOMAL_TX_NOTIFY_CTRL_COUNTER_START1 : TOMAL_TX_NOTIFY_CTRL_COUNTER_START0)); + + if (unlikely(netif_queue_stopped(tomal->netDev[channel]) && + (tomal->pendingTxBuffers[channel] + MAX_SKB_FRAGS + 1) < tomal->maxTxBuffers[channel])) + netif_wake_queue(tomal->netDev[channel]); + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + + +/* Disable IRQs. */ +void tomal_irq_disable(TOMAL* tomal, + U8 channel) +{ + /* Disable TX & RX MAC event and interrupt generation. */ + out_be32(&tomal->regs[channel]->rxMACStatusEnable, 0); + out_be32(&tomal->regs[channel]->txMACStatusEnable, 0); + out_be32(&tomal->regs[channel]->txMACStatusEnable, 0); + out_be32(&tomal->regs[channel]->txMACStatusMask, 0); + + /* Disable HW error event and interrupt generation. */ + out_be32(&tomal->regs[channel]->hwErrorsEnable, 0); + out_be32(&tomal->regs[channel]->hwErrorsMask, 0); + + /* Disable SW critical and non-critical error event and */ + /* interrupt generation. */ + out_be32(&tomal->regs[channel]->swCriticalErrorsEnable, 0); + out_be32(&tomal->regs[channel]->swCriticalErrorsMask, 0); + out_be32(&tomal->regs[channel]->swNonCriticalErrorsEnable, 0); + out_be32(&tomal->regs[channel]->swNonCriticalErrorsMask, 0); + + /* Disable TX & RX event interrupts. */ + out_be32(&tomal->regs[channel]->rxEventMask, 0); + out_be32(&tomal->regs[channel]->txEventMask, 0); + + return; +} + + +/* Enable IRQs and interrupt generation mechanisms. */ +void tomal_irq_enable(TOMAL* tomal, + U8 channel) +{ + /* Enable TX & RX MAC event and interrupt generation. */ + out_be32(&tomal->regs[channel]->rxMACStatusEnable, TOMAL_RX_MAC_XEMAC_MASK); + out_be32(&tomal->regs[channel]->txMACStatusEnable, TOMAL_TX_MAC_XEMAC_MASK); + out_be32(&tomal->regs[channel]->txMACStatusEnable, TOMAL_TX_MAC_XEMAC_MASK); + out_be32(&tomal->regs[channel]->txMACStatusMask, TOMAL_TX_MAC_XEMAC_MASK); + + /* Enable HW error event and interrupt generation. */ + out_be32(&tomal->regs[channel]->hwErrorsEnable, + TOMAL_HW_ERRORS_IRAPE | TOMAL_HW_ERRORS_ORAPE | + TOMAL_HW_ERRORS_IDBPE | TOMAL_HW_ERRORS_ODBPE); + out_be32(&tomal->regs[channel]->hwErrorsMask, + TOMAL_HW_ERRORS_IRAPE | TOMAL_HW_ERRORS_ORAPE | + TOMAL_HW_ERRORS_IDBPE | TOMAL_HW_ERRORS_ODBPE); + + /* Enable SW critical and non-critical error event and */ + /* interrupt generation. */ + out_be32(&tomal->regs[channel]->swCriticalErrorsEnable, + TOMAL_SW_CRIT_ERRORS_TDBC | TOMAL_SW_CRIT_ERRORS_RDBC); + out_be32(&tomal->regs[channel]->swCriticalErrorsMask, + TOMAL_SW_CRIT_ERRORS_TDBC | TOMAL_SW_CRIT_ERRORS_RDBC); + out_be32(&tomal->regs[channel]->swNonCriticalErrorsEnable, + TOMAL_SW_NONCRIT_ERRORS_TPDBC | TOMAL_SW_NONCRIT_ERRORS_RTSDB); + out_be32(&tomal->regs[channel]->swNonCriticalErrorsMask, + TOMAL_SW_NONCRIT_ERRORS_TPDBC | TOMAL_SW_NONCRIT_ERRORS_RTSDB); + + /* Enable TX & RX event interrupts. */ + out_be32(&tomal->regs[channel]->rxEventMask, TOMAL_RX_EVENT); + out_be32(&tomal->regs[channel]->txEventMask, TOMAL_TX_EVENT); + + /* Enable TX counters. */ + out_be32(&tomal->regs[0]->txNotificationCtrl, + (channel ? TOMAL_TX_NOTIFY_CTRL_COUNTER_START1 : + TOMAL_TX_NOTIFY_CTRL_COUNTER_START0)); + + /* Enable RX counters. */ + out_be32(&tomal->regs[0]->rxNotificationCtrl, + (channel ? TOMAL_RX_NOTIFY_CTRL_COUNTER_START1 : + TOMAL_RX_NOTIFY_CTRL_COUNTER_START0)); + + return; +} + + +/* Handle IRQs for channel 0 and any IRQs not specific to any channel. */ +static irqreturn_t tomal_irq0(int irq, + void* data) +{ + int rc = IRQ_NONE; + TOMAL* tomal = (TOMAL*) data; + EMAC* emac = (EMAC*) netdev_priv(tomal->netDev[0]); + U32 isr = in_be32(&tomal->regs[0]->interruptStatus); +#ifdef CONFIG_BGP_E10000_NAPI + int pollScheduled = 0; +#endif + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - irq=%d isr=%08x\n", irq, isr); + + if (likely(irq == tomal->irq0)) { + if (isr & TOMAL_INTERRUPT_RX0) { +#ifndef CONFIG_BGP_E10000_NAPI + int budget = tomal->maxRxBuffers[0]; +#endif + PRINTK(DBG_NAPI, "TOMAL_INTERRUPT_RX0 - irq=%d isr=%08x\n", irq, isr); + spin_lock(&tomal->rxLock[0]); +#ifdef CONFIG_BGP_E10000_NAPI + /* Disable further Rx interrupts. */ + out_be32(&tomal->regs[0]->rxEventMask, 0); + + /* Schedule Rx processing. */ + napi_schedule(&(tomal->napi[0])) ; + pollScheduled = 1; +#endif + + /* Clear the RX interrupt. */ + out_be32(&tomal->regs[0]->rxEventStatus, TOMAL_RX_EVENT); + +#ifndef CONFIG_BGP_E10000_NAPI + /* Process the buffers then allocate new ones. */ + rc = tomal_poll(tomal->netDev[0], budget); + if (rc != 0) + printk(KERN_CRIT "Failure processing RX buffers [%d]\n", rc); +#endif + spin_unlock(&tomal->rxLock[0]); + PRINTK(DBG_NAPI, "TOMAL_INTERRUPT_RX0 - IRQ_HANDLED\n"); + rc = IRQ_HANDLED; + } + if (isr & TOMAL_INTERRUPT_TX0) { + spin_lock(&tomal->txLock[0]); + + /* Clear any TX interrupt. */ + out_be32(&tomal->regs[0]->txEventStatus, TOMAL_TX_EVENT); + + /* Process the buffers that have been transmitted. */ + rc = tomal_process_tx_buffers(tomal, 0, + in_be32(&tomal->regs[0]->txNumberOfTransmittedFrames)-tomal->numberOfTransmittedFrames[0]); + if (rc <0) + printk(KERN_CRIT "Failure processing TX buffers [%d]\n", rc); + + spin_unlock(&tomal->txLock[0]); + rc = IRQ_HANDLED; + } + if (isr & TOMAL_INTERRUPT_TX_MAC_ERROR0) { + U32 status = in_be32(&tomal->regs[0]->txMACStatus); + + PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_TX_MAC_ERROR0 [%08x]\n", status); + + emac->stats.tx_errors++; + + /* Clear the interrupt. */ + out_be32(&tomal->regs[0]->txMACStatus, status); + rc = IRQ_HANDLED; + } + if (isr & TOMAL_INTERRUPT_RX_MAC_ERROR0) { + U32 status = in_be32(&tomal->regs[0]->rxMACStatus); + + PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_RX_MAC_ERROR0 [%08x]\n", status); + + emac->stats.rx_errors++; + + /* Clear the interrupt. */ + out_be32(&tomal->regs[0]->rxMACStatus, status); + rc = IRQ_HANDLED; + } + if (isr & TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0) { + U32 status = in_be32(&tomal->regs[0]->swNonCriticalErrorsStatus); +#ifndef CONFIG_BGP_E10000_NAPI + int budget = tomal->maxRxBuffers[0]; +#else + U32 swNonCriticalErrorsMask; +#endif + + if (status & TOMAL_SW_NONCRIT_ERRORS_TPDBC) { + /* Checksum failed on requested frame. */ + emac->stats.tx_errors++; + } else if (status & TOMAL_SW_NONCRIT_ERRORS_RTSDB) { + /* TOMAL has exhausted all the RX buffers. */ + U32 hwdrops = in_be32(&tomal->regs[0]->rxDroppedFramesCount); + emac->stats.rx_dropped += hwdrops; + tomal->numberOfHwDrops0 += hwdrops; + out_be32(&tomal->regs[0]->rxDroppedFramesCount, 0); + emac->stats.rx_errors++; +#ifndef CONFIG_BGP_E10000_NAPI + tomal_poll(tomal->netDev[0], budget); +#else + /* Disable too short Rx buffer interrupt and schedule Rx processing. */ + swNonCriticalErrorsMask = in_be32(&tomal->regs[0]->swNonCriticalErrorsMask); + out_be32(&tomal->regs[0]->swNonCriticalErrorsMask, + swNonCriticalErrorsMask & ~TOMAL_SW_NONCRIT_ERRORS_RTSDB); + PRINTK(DBG_NAPI, "TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0 pollScheduled=%d\n",pollScheduled); + if (!pollScheduled) + napi_schedule(&(tomal->napi[0])) ; + +#endif + } + else + e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_noncrit_int, + "Unknown non-critical SW error [0x%08x].", status); + + /* Clear the interrupt. */ + out_be32(&tomal->regs[0]->swNonCriticalErrorsStatus, status); + rc = IRQ_HANDLED; + } + if (isr & TOMAL_INTERRUPT_CRITICAL_ERROR) { + U32 swStatus = in_be32(&tomal->regs[0]->swCriticalErrorsStatus); + U32 hwStatus = in_be32(&tomal->regs[0]->hwErrorsStatus); + + PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_CRITICAL_ERROR [SW=%08x, HW=%08x]\n", + swStatus, hwStatus); + + /* Check for software errors. */ + if (swStatus & TOMAL_SW_CRIT_ERRORS_TDBC) + emac->stats.tx_errors++; + else if (swStatus & TOMAL_SW_CRIT_ERRORS_RDBC) + emac->stats.rx_errors++; + else if (swStatus) + e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_critical_int, + "Unknown critical SW error [%08x].", swStatus); + + /* Check for hardware errors. */ + if (hwStatus & (TOMAL_HW_ERRORS_IRAPE | TOMAL_HW_ERRORS_IDBPE)) + emac->stats.rx_errors++; + else if (hwStatus & (TOMAL_HW_ERRORS_ORAPE | TOMAL_HW_ERRORS_ODBPE)) + emac->stats.tx_errors++; + else if (hwStatus) + e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_critical_int, + "Unknown critical HW error [%08x].", hwStatus); + + /* Clear the interrupt(s). */ + out_be32(&tomal->regs[0]->hwErrorsStatus, hwStatus); + out_be32(&tomal->regs[0]->swCriticalErrorsStatus, swStatus); + + /* Soft reset required here. */ + tomal_soft_reset(tomal); + tomal_irq_enable(tomal, 0); + + rc = IRQ_HANDLED; + } + if (rc != IRQ_HANDLED) { + e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq, + "Unhandled interrupt - irq=%d, isr=0x%08x, rc=%d", + irq, isr, rc); + } + } else { + e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq, + "Spurious interrupt - irq=%d, isr=0x%08x.", + irq, isr); + } + + return rc; +} + +/* Handle interrupts for channel 0. */ +static irqreturn_t tomal_irq1(int irq, + void* data) +{ + int rc = IRQ_NONE; + TOMAL* tomal = (TOMAL*) data; + EMAC* emac = (EMAC*) netdev_priv(tomal->netDev[1]); + U32 isr = in_be32(&tomal->regs[0]->interruptStatus); +#ifdef CONFIG_BGP_E10000_NAPI + int pollScheduled = 0; +#endif + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - irq=%d isr=%08x\n", irq, isr); + + if (likely(irq == tomal->irq1)) { + if (isr & TOMAL_INTERRUPT_RX1) { +#ifndef CONFIG_BGP_E10000_NAPI + int budget = tomal->maxRxBuffers[1]; +#endif + spin_lock(&tomal->rxLock[1]); + +#ifdef CONFIG_BGP_E10000_NAPI + /* Disable further Rx interrupts. */ + out_be32(&tomal->regs[1]->rxEventMask, 0); + + /* Schedule Rx processing. */ + napi_schedule(&(tomal->napi[1])) ; + pollScheduled = 1; +#endif + + /* Clear the RX interrupt. */ + out_be32(&tomal->regs[1]->rxEventStatus, TOMAL_RX_EVENT); + +#ifndef CONFIG_BGP_E10000_NAPI + /* Process the buffers then allocate new ones. */ + rc = tomal_poll(tomal->netDev[1], budget); + if (rc != 0) + printk(KERN_CRIT "Failure processing RX buffers [%d]\n", rc); +#endif + spin_unlock(&tomal->rxLock[1]); + rc = IRQ_HANDLED; + } + if (isr & TOMAL_INTERRUPT_TX1) { + spin_lock(&tomal->txLock[1]); + + /* Clear any TX interrupt. */ + out_be32(&tomal->regs[1]->txEventStatus, TOMAL_TX_EVENT); + + /* Process the buffers that have been transmitted. */ + rc = tomal_process_tx_buffers(tomal, 1, + in_be32(&tomal->regs[1]->txNumberOfTransmittedFrames) - tomal->numberOfTransmittedFrames[1]); + if (rc < 0) + printk(KERN_CRIT "Failure processing TX buffers [%d]\n", rc); + + spin_unlock(&tomal->txLock[1]); + rc = IRQ_HANDLED; + } + if (isr & TOMAL_INTERRUPT_TX_MAC_ERROR1) { + U32 status = in_be32(&tomal->regs[1]->txMACStatus); + + PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_TX_MAC_ERROR1 [%08x]\n", status); + + emac->stats.tx_errors++; + + /* Clear the interrupt. */ + out_be32(&tomal->regs[1]->txMACStatus, status); + rc = IRQ_HANDLED; + } + if (isr & TOMAL_INTERRUPT_RX_MAC_ERROR1) { + U32 status = in_be32(&tomal->regs[1]->rxMACStatus); + + PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_RX_MAC_ERROR1 [%08x]\n", status); + + emac->stats.rx_errors++; + + /* Clear the interrupt. */ + out_be32(&tomal->regs[1]->rxMACStatus, status); + rc = IRQ_HANDLED; + } + if (isr & TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0) { + U32 status = in_be32(&tomal->regs[1]->swNonCriticalErrorsStatus); +#ifndef CONFIG_BGP_E10000_NAPI + int budget = tomal->maxRxBuffers[1]; +#else + U32 swNonCriticalErrorsMask; +#endif + if (status & TOMAL_SW_NONCRIT_ERRORS_TPDBC) + emac->stats.tx_errors++; + else if (status & TOMAL_SW_NONCRIT_ERRORS_RTSDB) { + /* TOMAL has exhausted all the RX buffers. */ + U32 hwdrops = in_be32(&tomal->regs[1]->rxDroppedFramesCount); + emac->stats.rx_dropped += hwdrops; + tomal->numberOfHwDrops1 += hwdrops; + out_be32(&tomal->regs[1]->rxDroppedFramesCount, 0); + emac->stats.rx_errors++; +#ifndef CONFIG_BGP_E10000_NAPI + tomal_poll(tomal->netDev[1], budget); +#else + /* Disable 'too short Rx buffer' interrupt and schedule Rx processing. */ + swNonCriticalErrorsMask = in_be32(&tomal->regs[1]->swNonCriticalErrorsMask); + out_be32(&tomal->regs[1]->swNonCriticalErrorsMask, + swNonCriticalErrorsMask & ~TOMAL_SW_NONCRIT_ERRORS_RTSDB); + if (!pollScheduled) + napi_schedule(&(tomal->napi[1])) ; +#endif + } else + e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_noncrit_int, + "Unknown non-critical SW error [0x%08x].", status); + + /* Clear the interrupt. */ + out_be32(&tomal->regs[1]->swNonCriticalErrorsStatus, status); + rc = IRQ_HANDLED; + } + if (rc != IRQ_HANDLED) { + e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq, + "Unhandled interrupt - irq=%d, isr=0x%08x, rc=%d", + irq, isr, rc); + } + } else { + e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq, + "Spurious interrupt - irq=%d, isr=0x%08x.", irq, isr); + } + + return rc; +} + + +/* Configure TOMAL. */ +int tomal_configure(TOMAL* tomal) +{ + int rc = 0; + int c; + + PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "entry - tomal=%p\n", tomal); + + out_be32(&tomal->regs[0]->configurationCtrl, TOMAL_CFG_CTRL_RX_MAC0 | + TOMAL_CFG_CTRL_RX_MAC1 | TOMAL_CFG_CTRL_TX_MAC0 | + TOMAL_CFG_CTRL_TX_MAC1 | TOMAL_CFG_CTRL_PLB_FREQ_250); + out_be32(&tomal->regs[0]->consumerMemoryBaseAddr, 0); + out_be32(&tomal->regs[0]->packetDataEngineCtrl, TOMAL_PDE_CTRL_RX_PREFETCH1 | + TOMAL_PDE_CTRL_TX_PREFETCH1); /* prefetch 1 descriptor */ + out_be32(&tomal->regs[0]->interruptRoute, TOMAL_IRQ1_MASK); /* route #1 ints to TOE_PLB_INT[1] */ + for (c = 0; c < TOMAL_MAX_CHANNELS; c++) + if (tomal->netDev[c]) { + /* Allocate RX descriptors. */ + rc = tomal_alloc_rx_segments(tomal, c, tomal->maxRxBuffers[c]); + if (rc) { + /* Failure allocating requested descriptors. */ + BUG_ON(rc); + } + + /* Allocate RX buffers and initialize RX descriptor info. */ + tomal->oldRxSegment[c] = tomal->rxDescSegment[c]; + + rc = tomal_alloc_rx_buffers(tomal, c); + if (rc <= 0) { + if (c && tomal->netDev[0]) + tomal_free_rx_buffers(tomal, 0); + break; + } + else + rc = 0; + + /* Allocate TX descriptors and initialize TX descriptor info. */ + rc = tomal_alloc_tx_segments(tomal, c, tomal->maxTxBuffers[c]); + if (rc) { + /* Failure allocating requested descriptors. */ + printk(KERN_CRIT "Failure allocating %d TX descriptors.\n", tomal->maxTxBuffers[c]); + BUG_ON(rc); + } + tomal->pendingTxBuffers[c] = 0; + tomal->oldTxSegment[c] = tomal->freeTxSegment[c] = tomal->txDescSegment[c]; + tomal->freeTxSegment[c]->freeIndex = tomal->freeTxSegment[c]->oldIndex = + tomal->numberOfTransmittedFrames[c] = tomal->numberOfReceivedFrames[c] = 0; + + /* Initialize the timers and counters. */ + out_be32(&tomal->regs[c]->txMinTimer, 255); + out_be32(&tomal->regs[c]->txMaxTimer, 255); + out_be32(&tomal->regs[c]->txMaxFrameNum, tomal->maxTxBuffers[c]); + out_be32(&tomal->regs[c]->txMinFrameNum, 255); + out_be32(&tomal->regs[c]->rxMinTimer, 255); + out_be32(&tomal->regs[c]->rxMaxTimer, 22); + out_be32(&tomal->regs[c]->rxMinFrameNum, 255); +#ifdef CONFIG_BGP_E10000_NAPI + out_be32(&tomal->regs[c]->rxMaxFrameNum, 4); +#else + out_be32(&tomal->regs[c]->rxMaxFrameNum, 64); +#endif + + /* Initialize spinlocks. */ + spin_lock_init(&tomal->rxLock[c]); + spin_lock_init(&tomal->txLock[c]); + +#ifdef CONFIG_BGP_E10000_NAPI + netif_napi_add(tomal->netDev[c],&(tomal->napi[c]),tomal_poll_napi,tomal->maxRxBuffers[c]) ; + napi_enable(&(tomal->napi[c])) ; +#endif + } + + PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "exit - rc=%d\n", rc); + + return rc; +} + + + +TOMAL* __init +tomal_init(void* devMapAddr, + struct net_device* netDev0, + U32 rxTotalBufferSize0, + U32 numTxBuffers0, + struct net_device* netDev1, + U32 rxTotalBufferSize1, + U32 numTxBuffers1, + int irq0, + int irq1, + struct proc_dir_entry* procDir) +{ + TOMAL* tomal; + int rc = 0; + int c; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - netDev0=%p, rxTotalBufferSize0=%d, " + "numTxBuffers0=%d, netDev1=%p, rxTotalBufferSize1=%d, " + "numTxBuffers1=%d, irq0=%d, irq1=%d, procDir=%p\n", netDev0, rxTotalBufferSize0, + numTxBuffers0, netDev1, rxTotalBufferSize1, numTxBuffers1, irq0, irq1, procDir); + + /* Allocate tomal object. */ + tomal = kmalloc(sizeof(TOMAL), GFP_KERNEL); + if (!tomal) { + e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error, + "Failure allocating TOMAL device."); + rc = -ENOMEM; + goto end; + } + memset((void*) tomal, 0, sizeof(*tomal)); + + /* Map the TOMAL registers. */ + tomal->regs[0] = (TOMALRegs*) devMapAddr; + if (!tomal->regs[0]) { + e10000_printr(bg_subcomp_tomal, tomal_ras_ioremap_error, + "Failure maping TOMAL registers."); + rc = -ENXIO; + goto free_tomal; + } + + /* Setup a register mapping for the second channel. The registers that */ + /* are specific to the second channel are located 0x100 bytes past the */ + /* registers specific to the first channel. Use this mapping for */ + /* channel 1 specific registers only! */ + tomal->regs[1] = (TOMALRegs*) ((U8*) tomal->regs[0]) + 0x100; + + /* Register interrupt handlers. TOMAL has two interrupt lines. */ + tomal->irq0 = irq0; + tomal->irq1 = irq1; + rc = request_irq(tomal->irq0, tomal_irq0, IRQF_DISABLED, "TOMAL IRQ0", (void*) tomal); + if (!rc) { + rc = request_irq(tomal->irq1, tomal_irq1, IRQF_DISABLED, "TOMAL IRQ1", (void*) tomal); + if (rc) { + e10000_printr(bg_subcomp_tomal, tomal_ras_irq_unavailable, + "Unable to register IRQ - irq1=0x%08x.", irq1); + free_irq(tomal->irq0, tomal); + tomal->irq0 = 0xffffffff; + goto free_irqs; + } + } else { + e10000_printr(bg_subcomp_tomal, tomal_ras_irq_unavailable, + "Unable to register IRQ - irq0=0x%08x.", irq0); + goto unmap_tomal_regs; + } + + /* Create /proc/driver/e10000/tomal directory. */ + tomal->parentDir = procDir; + if (procDir) { + tomal->tomalDir = proc_mkdir("tomal", procDir); + if (tomal->tomalDir) { + tomal->hwDir = proc_mkdir("hw", tomal->tomalDir); + if (tomal->hwDir) { + E10000_PROC_ENTRY* entry = tomal_hw_proc_entry; + + while (entry->name) { + entry->entry = e10000_create_proc_entry(tomal->hwDir, entry->name, (void*) + ((U32) entry->addr + (U32) tomal->regs[0])); + entry++; + } + } + tomal->swDir = proc_mkdir("sw", tomal->tomalDir); + if (tomal->swDir) { + tomal_sw_proc_entry[0].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[0].name, + (void*) &tomal->maxRxBuffers[0]); + tomal_sw_proc_entry[1].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[1].name, + (void*) &tomal->maxRxBuffers[1]); + tomal_sw_proc_entry[2].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[2].name, + (void*) &tomal->rxBufferSize[0]); + tomal_sw_proc_entry[3].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[3].name, + (void*) &tomal->rxBufferSize[1]); + tomal_sw_proc_entry[4].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[4].name, + (void*) &tomal->rxDescSegment[0]); + tomal_sw_proc_entry[5].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[5].name, + (void*) &tomal->rxDescSegment[1]); + tomal_sw_proc_entry[6].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[6].name, + (void*) &tomal->oldRxSegment[0]); + tomal_sw_proc_entry[7].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[7].name, + (void*) &tomal->oldRxSegment[1]); + tomal_sw_proc_entry[8].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[8].name, + (void*) &tomal->maxTxBuffers[0]); + tomal_sw_proc_entry[9].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[9].name, + (void*) &tomal->maxTxBuffers[1]); + tomal_sw_proc_entry[10].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[10].name, + (void*) &tomal->pendingTxBuffers[0]); + tomal_sw_proc_entry[11].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[11].name, + (void*) &tomal->pendingTxBuffers[1]); + tomal_sw_proc_entry[12].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[12].name, + (void*) &tomal->numberOfTransmittedFrames[0]); + tomal_sw_proc_entry[13].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[13].name, + (void*) &tomal->numberOfTransmittedFrames[1]); + tomal_sw_proc_entry[14].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[14].name, + (void*) &tomal->txDescSegment[0]); + tomal_sw_proc_entry[15].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[15].name, + (void*) &tomal->txDescSegment[1]); + tomal_sw_proc_entry[16].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[16].name, + (void*) &tomal->oldTxSegment[0]); + tomal_sw_proc_entry[17].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[17].name, + (void*) &tomal->oldTxSegment[1]); + tomal_sw_proc_entry[18].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[18].name, + (void*) &tomal->freeTxSegment[0]); + tomal_sw_proc_entry[19].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[19].name, + (void*) &tomal->freeTxSegment[1]); + tomal_sw_proc_entry[20].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[20].name, + (void*) &tomal->irq0); + tomal_sw_proc_entry[21].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[21].name, + (void*) &tomal->irq1); + tomal_sw_proc_entry[22].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[22].name, + (void*) &tomal->numberOfNetrxDrops); + tomal_sw_proc_entry[23].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[23].name, + (void*) &tomal->numberOfHwDrops0); + tomal_sw_proc_entry[24].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[24].name, + (void*) &tomal->numberOfHwDrops1); + tomal_sw_proc_entry[25].entry = + e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[25].name, + (void*) &tomal->numberOfNotLast); +/* tomal_sw_proc_entry[22].entry = */ +/* e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[22].name, */ +/* (void*) &tomal->count_tx_checksum_type[0]); */ +/* tomal_sw_proc_entry[23].entry = */ +/* e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[23].name, */ +/* (void*) &tomal->count_tx_checksum_type[1]); */ +/* tomal_sw_proc_entry[24].entry = */ +/* e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[24].name, */ +/* (void*) &tomal->count_tx_checksum_type[2]); */ +/* tomal_sw_proc_entry[25].entry = */ +/* e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[25].name, */ +/* (void*) &tomal->count_tx_checksum_type[3]); */ + } + } + } + + /* For each configured channel allocate descriptor segments and perform other initialization. */ + tomal->netDev[0] = netDev0; + if (netDev0) { + tomal->rxBufferSize[0] = 9000 + ETH_HLEN + BGP_E10000_FCS_SIZE; + tomal->maxRxBuffers[0] = (rxTotalBufferSize0 <= TOMAL_RX_TOTAL_BUFFER_SIZE_MAX ? rxTotalBufferSize0 : + TOMAL_RX_TOTAL_BUFFER_SIZE_MAX) / tomal->rxBufferSize[0] ; + tomal->maxTxBuffers[0] = numTxBuffers0; + } + tomal->netDev[1] = netDev1; + if (netDev1) { + tomal->rxBufferSize[1] = 9000 + ETH_HLEN + BGP_E10000_FCS_SIZE; + tomal->maxRxBuffers[1] = (rxTotalBufferSize1 <= TOMAL_RX_TOTAL_BUFFER_SIZE_MAX ? rxTotalBufferSize1 : + TOMAL_RX_TOTAL_BUFFER_SIZE_MAX) / tomal->rxBufferSize[1]; + tomal->maxTxBuffers[1] = numTxBuffers1; + } + for (c = 0; c < TOMAL_MAX_CHANNELS; c++) { + if (tomal->netDev[c]) { +#ifdef CONFIG_BGP_E10000_IP_CHECKSUM + /* Tell the network stack that TOMAL performs IP checksum and */ + /* that it can handle the transmission of scatter/gather data. */ + tomal->netDev[c]->features |= (NETIF_F_SG | NETIF_F_IP_CSUM); +#endif + tomal->netDev[c]->features |= (NETIF_F_HIGHDMA | NETIF_F_LLTX); + + } + } + tomal_soft_reset(tomal); + + goto end; + +free_irqs: + if (tomal->irq0) + free_irq(tomal->irq0, (void*) tomal); + if (tomal->irq1) + free_irq(tomal->irq1, (void*) tomal); + +unmap_tomal_regs: + tomal->regs[0] = NULL; + +free_tomal: + kfree((void*) tomal); + +end: + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return (rc ? ERR_PTR(rc) : tomal); +} + + +/* Allocate an SKB for each Rx descriptor that doesn't already reference one. */ +int tomal_alloc_rx_buffers(TOMAL* tomal, + U8 channel) +{ + int rc = 0; + RxDescSegment* segment; + RxDesc* desc; + RxDesc* startDesc; + struct sk_buff** skb; + U32 bytesAlloced = 0; + U32 buffersAlloced = 0; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p channel=%d\n", tomal, channel); + + segment = tomal->rxDescSegment[channel]; + desc = segment->desc; + startDesc = desc; + skb = segment->skb; + + /* Iterate over all descriptors and allocate a buffer to any */ + /* descriptors that don't already point to a buffer. */ + do { + /* Have we reached the end of the segment? */ + if (desc == (RxDesc*) segment->branchDesc) { + /* Move the descriptor segment pointer to the next segment. */ + segment = segment->next; + desc = segment->desc; + skb = segment->skb; + if (desc == startDesc) + /* We've been through all descriptors. */ + break; + } + + /* If this descriptor is unused then allocate a buffer here. */ + if (!desc->postedLength) { + /* Allocate a buffer. */ + *skb = alloc_skb(tomal->rxBufferSize[channel] + 16, GFP_ATOMIC); + if (*skb) { + skb_reserve(*skb, 2); + + /* Point a descriptor at the buffer. */ + desc->code = TOMAL_RX_DESC_CODE; + desc->postedLength = tomal->rxBufferSize[channel]; + desc->status = 0; + desc->totalFrameLength = 0; + desc->buffHeadAddrH = 0; + desc->buffHeadAddrL = + dma_map_single(NULL, (*skb)->data, + desc->postedLength, + DMA_FROM_DEVICE); + BUG_ON(!desc->buffHeadAddrL); + + bytesAlloced += desc->postedLength; + buffersAlloced++; + } else { + e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error, + "Failure allocating SKB."); + break; + } + } + + /* Advance to the next descriptor and buffer. */ + desc++; + skb++; + } while (desc != startDesc); + + /* Now tell TOMAL about all the buffers allocated. */ + /* We can add up to 64K at a time for a maximum total of 1MB. */ + while (bytesAlloced) { + U32 size = (bytesAlloced <= 0xffff ? bytesAlloced : 0xffff); + + BUG_ON(in_be32(&tomal->regs[channel]->rxTotalBufferSize) + size > 0x100000); + out_be32(&tomal->regs[channel]->rxAddFreeBytes, size); + bytesAlloced -= size; + } + + rc = (rc ? rc : buffersAlloced); + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + +/* Receive frames until the indicated number of frames have been received or there are no more */ +/* frames available. */ +#if defined(CONFIG_BGP_E10000_NAPI) +int tomal_poll_napi(struct napi_struct * napi, int budget) /* struct net_device* netDev, int* budget) */ +{ + struct net_device *netDev = napi->dev ; +#else +int tomal_poll(struct net_device *netDev, int budget) /* struct net_device* netDev, int* budget) */ +{ +#endif + int rc; + EMAC* emac = (EMAC*) netdev_priv(netDev); + TOMAL* tomal = emac->tomal; + U8 channel = emac->channel; + RxDescSegment* segment = tomal->oldRxSegment[channel]; + register RxDesc* desc = segment->currDesc; + register struct sk_buff** skb = segment->currSkb; + register const U32 buffLen = tomal->rxBufferSize[channel]; + register const U32 skbSize = buffLen + 16; + register U32 rxNumberOfReceivedFrames = in_be32(&tomal->regs[channel]->rxNumberOfReceivedFrames); + register U32 framesToProcess = rxNumberOfReceivedFrames - tomal->numberOfReceivedFrames[channel]; + register U32 framesReceived = 0; + register U32 bytesPosted = 0; + register int quota = min(budget, (int) framesToProcess); + + PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "entry - netDev=%p, budget=%d\n", netDev, budget); + +/* #ifdef CONFIG_BGP_E10000_NAPI */ +/* // Determine receive quota. */ +/* if (quota > netDev->quota) */ +/* quota = netDev->quota; */ +/* #endif */ + + /* Iterate over the RX descriptors, starting with the oldest, processing each */ + /* data buffer that has been received until the indicated number of frames */ + /* have been processed. */ + while (likely((framesReceived < quota) && framesToProcess)) { + /* Is the current descriptor describing a valid frame? */ + if (likely(desc->status & TOMAL_RX_LAST)) { + PRINTK(DBG_TOMAL | DBG_LEVEL3 | DBG_NAPI, "Received %d bytes to skb %p\n", desc->totalFrameLength, *skb); + if (likely((desc->status & TOMAL_RX_STATUS_CHECKSUM_VALID) && + (desc->status & TOMAL_RX_STATUS_IP_CHECKSUM_PASSED) && + (desc->status & TOMAL_RX_STATUS_TCP_UDP_CHECKSUM_PASSED))) + /* Valid checksum. */ + (*skb)->ip_summed = CHECKSUM_UNNECESSARY; + else + (*skb)->ip_summed = CHECKSUM_NONE; + skb_put(*skb, desc->totalFrameLength); + (*skb)->dev = netDev; + (*skb)->protocol = eth_type_trans(*skb, netDev); +#ifdef CONFIG_BGP_E10000_NAPI + PRINTK(DBG_NAPI, "netif_receive_skb\n"); + rc = netif_receive_skb(*skb); +#else + rc = netif_rx(*skb); +#endif + *skb = NULL; + if (likely(rc == NET_RX_SUCCESS)) { + framesReceived++; + emac->stats.rx_bytes += desc->totalFrameLength; + } else if (rc == NET_RX_DROP || rc == NET_RX_BAD) { + emac->stats.rx_dropped++; + tomal->numberOfNetrxDrops ++ ; + } else + emac->stats.rx_errors++; + } else { + tomal->numberOfNotLast++ ; + } + + /* Make the current slot in the Rx ring useable again. */ + if (likely(*skb == NULL)) { + *skb = alloc_skb(skbSize, GFP_ATOMIC); + if (likely(*skb)) { + skb_reserve(*skb, 2); /* align */ + desc->buffHeadAddrL = dma_map_single(NULL, (*skb)->data, buffLen, DMA_FROM_DEVICE); + desc->postedLength = buffLen; + bytesPosted += desc->postedLength; + } else + desc->postedLength = desc->buffHeadAddrL = 0; + } else /* Reinitialize this descriptor */ + bytesPosted += desc->postedLength; /* descriptor avaialable again so repost */ + desc->status = 0; + + /* Post additional buffers to the device if we've accumulated enough. */ + if (unlikely(bytesPosted >= 0xffff)) { + out_be32(&tomal->regs[channel]->rxAddFreeBytes, 0xffff); + bytesPosted -= 0xffff; + } + + skb++; + desc++; + framesToProcess--; + + /* Have we reached the end of the segment? */ + if (unlikely(desc->code != TOMAL_RX_DESC_CODE)) { + /* Move to the next segment. */ + segment->currDesc = segment->desc; + segment->currSkb = segment->skb; + tomal->oldRxSegment[channel] = segment = segment->next; + desc = segment->currDesc; + skb = segment->currSkb; + } + } + + /* Post any remaining buffers to the device. */ + if (likely(bytesPosted)) + out_be32(&tomal->regs[channel]->rxAddFreeBytes, bytesPosted); + + /* Update segment information and statistics. */ + segment->currDesc = desc; + segment->currSkb = skb; + emac->stats.rx_packets += framesReceived; + tomal->numberOfReceivedFrames[channel] = rxNumberOfReceivedFrames - framesToProcess; + + /* Reset the Rx notification mechanism. */ + out_be32(&tomal->regs[0]->rxNotificationCtrl, (channel ? TOMAL_RX_NOTIFY_CTRL_COUNTER_START1 : TOMAL_RX_NOTIFY_CTRL_COUNTER_START0)); + +#ifdef CONFIG_BGP_E10000_NAPI +/* netDev->quota -= framesReceived; */ + budget -= framesReceived; + if (framesReceived == quota) { + /* We processed all frames within the specified quota. Reenable interrupts */ + /* and tell the kernel that we received everything available. */ + U32 swNonCriticalErrorsMask = in_be32(&tomal->regs[0]->swNonCriticalErrorsMask); + PRINTK(DBG_NAPI, "napi_complete\n"); + napi_complete(napi) ; + out_be32(&tomal->regs[channel]->rxEventMask, TOMAL_RX_EVENT); + if (!(swNonCriticalErrorsMask & TOMAL_SW_NONCRIT_ERRORS_RTSDB)) + out_be32(&tomal->regs[0]->swNonCriticalErrorsMask, + swNonCriticalErrorsMask | TOMAL_SW_NONCRIT_ERRORS_RTSDB); + rc = 0; + } else + rc = 1; +#else + rc = 0; +#endif + + PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "exit - rc=%d\n", rc); + + return rc; +} + +static inline U16 * frame_checksum_ptr(struct sk_buff* skb) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + unsigned int eth_proto = eth->h_proto ; + struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ; + unsigned int iphlen = 4*iph->ihl ; + struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) ); + struct udphdr *udph = (struct udphdr *) ( ((char *)(iph)) + (iphlen) ); + unsigned int ip_proto = iph->protocol ; + if( eth_proto == ETH_P_IP) { + if( ip_proto == IPPROTO_TCP) return &(tcph->check) ; + if( ip_proto == IPPROTO_UDP) return &(udph->check) ; + } + return NULL ; + +} +/* Transmit a frame. */ +/* Caller should be holding the TOMAL lock for the specified channel. */ +int tomal_xmit_tx_buffer(TOMAL* tomal, + U8 channel, + struct sk_buff* skb) +{ + int rc = 0; + int nr_frags = skb_shinfo(skb)->nr_frags; + int f = -1; + TxDescSegment* segment = tomal->freeTxSegment[channel]; + U32 framesToProcess; + U32 buffLen; + dma_addr_t buffAddr; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, skb=%p, channel=%d\n", tomal, skb, channel); + + do { + /* Are we at the end of the segment? */ + if (unlikely(segment->desc[segment->freeIndex].code == 0x20)) { + segment->freeIndex = 0; + tomal->freeTxSegment[channel] = segment = segment->next; + } + + /* Point the next free descriptor(s) at the SKB buffer(s). The first buffer is a special case. */ + if (f < 0) { + /* The data is in the skb's data buffer. */ + buffLen = skb->len - skb->data_len; + buffAddr = dma_map_single(NULL, skb->data, buffLen, DMA_TO_DEVICE); +/* tomal->count_tx_checksum_type[skb->ip_summed] += 1 ; */ +#ifdef CONFIG_BGP_E10000_IP_CHECKSUM + /* When using the IO node as a router (collective --> ethernet ) frames are coming across marked CHECKSUM_COMPLETE */ + /* even though I think they should be marked CHECKSUM_PARTIAL. Use the TOMAL checksumming hardware on the frames. */ +/* if (skb->ip_summed == CHECKSUM_PARTIAL) */ + if( 1) + { + /* Generate IP checksum for this frame. */ + U16 * frame_ck_ptr=frame_checksum_ptr(skb) ; + if( frame_ck_ptr ) *frame_ck_ptr = 0 ; +/* if( frame_ck_ptr && frame_ck_ptr != (U16*)(skb->head+skb->csum_start + skb->csum_offset)) */ +/* { */ +/* printk(KERN_INFO "(E) frame_ck_ptr=%p skb->head=%p skb->csum_start=%d skb->csum_offset=%d\n", */ +/* frame_ck_ptr,skb->head,skb->csum_start,skb->csum_offset) ; */ +/* } */ +/* *(U16*)(skb->head+skb->csum_start + skb->csum_offset) = 0; */ + segment->desc[segment->freeIndex].command = TOMAL_TX_ENABLE_HW_CHECKSUM | + TOMAL_TX_GENERATE_FCS | TOMAL_TX_GENERATE_PAD; + } else { + segment->desc[segment->freeIndex].command = TOMAL_TX_GENERATE_FCS | TOMAL_TX_GENERATE_PAD; + } +#else + segment->desc[segment->freeIndex].command = TOMAL_TX_GENERATE_FCS | TOMAL_TX_GENERATE_PAD; +#endif + + } else { + struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f]; + + /* Map the page that contains the current fragment. */ + buffAddr = dma_map_page(NULL, frag->page, frag->page_offset, frag->size, DMA_TO_DEVICE); + buffLen = frag->size; + } + + segment->desc[segment->freeIndex].wBStatus = 0; + segment->desc[segment->freeIndex].postedLength = buffLen; + segment->desc[segment->freeIndex].buffHeadAddrL = (U32) buffAddr; + segment->desc[segment->freeIndex].code = TOMAL_TX_DESC_CODE; + if (f == (nr_frags - 1)) { /* Last buffer? */ + segment->desc[segment->freeIndex].code |= TOMAL_TX_NOTIFY_REQ | TOMAL_TX_SIGNAL | TOMAL_TX_LAST; + segment->skb[segment->freeIndex] = skb; + + /* Post buffer(s) for transmission. */ + PRINTK(DBG_TOMAL | DBG_LEVEL3, "Enqueueing buffer 0x%08x for xmit, index=%d, desc=%p, len=%d, code=0x%x\n", + (U32) buffAddr, segment->freeIndex, &segment->desc[segment->freeIndex], segment->desc[segment->freeIndex].postedLength, + segment->desc[segment->freeIndex].code); + smp_wmb(); + out_be32(&tomal->regs[channel]->txAddPostedFrames, 1); + } + + /* Advance to the next free descriptor index. */ + segment->freeIndex++; + f++; + } while (f < nr_frags); + tomal->pendingTxBuffers[channel] += f+1; + + /* Clean up any buffers for frames that have been transmitted. */ + framesToProcess = in_be32(&tomal->regs[channel]->txNumberOfTransmittedFrames) - tomal->numberOfTransmittedFrames[channel]; + if (unlikely(framesToProcess > 32)) { + int bufsProcessed = tomal_process_tx_buffers(tomal, channel, framesToProcess); + if (unlikely(bufsProcessed < 0)) + printk(KERN_WARNING "%s: Error processing TX buffers [%d]\n", + tomal->netDev[channel]->name, bufsProcessed); + } + + /* Stop the queue if we lack the space to transmit another frame. */ + if (unlikely((tomal->pendingTxBuffers[channel] + MAX_SKB_FRAGS + 1) > + tomal->maxTxBuffers[channel])) + netif_stop_queue(tomal->netDev[channel]); + + tomal->netDev[channel]->trans_start = jiffies; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} + + + +void tomal_exit(TOMAL* tomal) +{ + int c; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry\n"); + + if (tomal) { + /* Release interrupt handlers. */ + free_irq(TOMAL_IRQ0, tomal); + free_irq(TOMAL_IRQ1, tomal); + + /* Free descriptor segments for each channel. */ + for (c = 0; c < TOMAL_MAX_CHANNELS; c++) { + tomal_free_rx_segments(tomal, c); + tomal_free_tx_segments(tomal, c); + + /* Unregister and free net_device */ + if (tomal->netDev[c]) { + EMAC* emac = netdev_priv(tomal->netDev[c]); + + /* Allow EMAC to cleanup. */ + if (emac) + emac_exit(emac); + + unregister_netdev(tomal->netDev[c]); + free_netdev(tomal->netDev[c]); + } + } + + /* Remove /proc entries. */ + if (tomal->tomalDir) { + if (tomal->hwDir) { + E10000_PROC_ENTRY* entry = tomal_hw_proc_entry; + + while (entry->name) { + if (entry->entry) { + remove_proc_entry(entry->entry->name, tomal->hwDir); + entry->entry = NULL; + } + entry++; + } + + remove_proc_entry(tomal->hwDir->name, tomal->tomalDir); + tomal->hwDir = NULL; + } + if (tomal->swDir) { + E10000_PROC_ENTRY* entry = tomal_sw_proc_entry; + while (entry->name) { + if (entry->entry) { + remove_proc_entry(entry->entry->name, tomal->swDir); + entry->entry = NULL; + } + entry++; + } + + remove_proc_entry(tomal->swDir->name, tomal->tomalDir); + tomal->swDir = NULL; + } + + remove_proc_entry(tomal->tomalDir->name, tomal->parentDir); + tomal->tomalDir = NULL; + } + + /* Free the TOMAL object. */ + kfree((void*) tomal); + } + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit\n"); + + return; +} + + +/* Reset and reconfigure the TOMAL hardware and reinitialize Rx descriptors. */ +int tomal_soft_reset(TOMAL* tomal) +{ + int rc = 0; + int c; + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p\n", tomal); + + /* Reset TOMAL and wait for it to finish. */ + out_be32(&tomal->regs[0]->configurationCtrl, TOMAL_CFG_CTRL_SOFT_RESET); + for (c = 100; (in_be32(&tomal->regs[0]->configurationCtrl) & TOMAL_CFG_CTRL_SOFT_RESET) && c; c--) + udelay(10000); + if (!c) { + e10000_printr(bg_subcomp_tomal, tomal_ras_timeout, + "TOMAL reset failure."); + rc = -ETIME; + } else { + /* Reset EMAC(s) and free any buffers. */ + for (c = 0; c < TOMAL_MAX_CHANNELS; c++) + if (tomal->netDev[c]) { + /* Free any RX and TX buffers. */ + tomal_free_rx_buffers(tomal, c); + tomal_free_tx_buffers(tomal, c); + + /* Free descriptor segments */ + tomal_free_rx_segments(tomal, c); + tomal_free_tx_segments(tomal, c); + } + + /* Reconfigure TOMAL. */ + rc = tomal_configure(tomal); + } + + PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc); + + return rc; +} diff --git a/drivers/net/bgp_e10000/bgp_tomal.h b/drivers/net/bgp_e10000/bgp_tomal.h new file mode 100644 index 00000000000000..d45ef5813793c9 --- /dev/null +++ b/drivers/net/bgp_e10000/bgp_tomal.h @@ -0,0 +1,423 @@ +/* + * bgp_tomal.h: Definition of TOMAL device for BlueGene/P 10 GbE driver + * + * Copyright (c) 2007, 2010 International Business Machines + * Author: Andrew Tauferner <ataufer@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef _BGP_TOMAL_H +#define _BGP_TOMAL_H + +#include <asm/io.h> +#include <asm/bluegene.h> +#include <linux/skbuff.h> +#include <linux/proc_fs.h> + +#include "bgp_e10000.h" + +#define TOMAL_MAX_CHANNELS 2 + + +#define TOMAL_RX_MAX_FRAME_NUM 10 +#define TOMAL_RX_MAX_TIMER 50 + + +#define TOMAL_IRQ_GROUP 8 +#define TOMAL_IRQ0_GINT 0 +#define TOMAL_IRQ1_GINT 1 +#define TOMAL_IRQ0 bic_hw_to_irq(TOMAL_IRQ_GROUP, TOMAL_IRQ0_GINT) +#define TOMAL_IRQ1 bic_hw_to_irq(TOMAL_IRQ_GROUP, TOMAL_IRQ1_GINT) + + +#define TOMAL_BASE_ADDRESS 0x720000000ULL +typedef volatile struct _TOMALRegs { + U32 configurationCtrl; /* 0000 configuration control */ +#define TOMAL_CFG_CTRL_RX_MAC0 0x00800000 +#define TOMAL_CFG_CTRL_RX_MAC1 0x00400000 +#define TOMAL_CFG_CTRL_TX_MAC0 0x00200000 +#define TOMAL_CFG_CTRL_TX_MAC1 0x00100000 +#define TOMAL_CFG_CTRL_PLB_FREQ_250 0x00000000 +#define TOMAL_CFG_CTRL_PLB_FREQ_300 0x00040000 +#define TOMAL_CFG_CTRL_PLB_FREQ_350 0x00080000 +#define TOMAL_CFG_CTRL_PLB_FREQ_400 0x000c0000 +#define TOMAL_CFG_CTRL_PLB_M_POWER 0x00000080 +#define TOMAL_CFG_CTRL_SLEEP 0x00000002 +#define TOMAL_CFG_CTRL_SOFT_RESET 0x00000001 + U32 reserved1[23]; /* 0004 */ + U32 revisionID; /* 0060 revision id */ + U32 reserved2[103]; /* 0064 */ + U32 consumerMemoryBaseAddr; /* 0200 consumer memory base address */ + U32 reserved3[127]; /* 0204 */ + U32 packetDataEngineCtrl; /* 0400 packet data engine control */ +#define TOMAL_PDE_CTRL_RX_PREFETCH8 0x00000030 +#define TOMAL_PDE_CTRL_RX_PREFETCH1 0x00000000 +#define TOMAL_PDE_CTRL_TX_PREFETCH8 0x00000003 +#define TOMAL_PDE_CTRL_TX_PREFETCH1 0x00000000 + U32 reserved4[127]; /* 0404 */ + U32 txNotificationCtrl; /* 0600 TX notification control */ +#define TOMAL_TX_NOTIFY_CTRL_COUNTER_START0 0x00000020 +#define TOMAL_TX_NOTIFY_CTRL_COUNTER_START1 0x00000010 + U32 reserved5[3]; /* 0604 */ + U32 txMinTimer; /* 0610 TX min timer */ + U32 reserved6[3]; /* 0614 */ + U32 txMaxTimer; /* 0620 TX max timer */ + U32 reserved7[11]; /* 0624 */ + U32 txFramePerServiceCtrl; /* 0650 TX frame / service control */ + U32 reserved8[3]; /* 0654 */ + U32 txHWCurrentDescriptorAddrH; /* 0660 TX HW current desc. addr. High */ + U32 reserved9[3]; /* 0664 */ + U32 txHWCurrentDescriptorAddrL; /* 0670 TX HW current desc. addr. Low */ + U32 reserved10[7]; /* 0674 */ + U32 txPendingFrameCount; /* 0690 TX pending frame count */ +#define TOMAL_MAX_TX_PENDING_FRAMES 216 + U32 reserved11[3]; /* 0694 */ + U32 txAddPostedFrames; /* 06A0 TX add posted frames */ + U32 reserved12[3]; /* 06A4 */ + U32 txNumberOfTransmittedFrames; /* 06B0 TX number transmitted frames */ + U32 reserved13[3]; /* 06B4 */ + U32 txMaxFrameNum; /* 06C0 TX max frame number */ + U32 reserved14[3]; /* 06C4 */ + U32 txMinFrameNum; /* 06D0 TX min frame number */ + U32 reserved15[3]; /* 06D4 */ + U32 txEventStatus; /* 06E0 TX event status */ +#define TOMAL_TX_EVENT 0x00000001 + U32 reserved16[3]; /* 06E4 */ + U32 txEventMask; /* 06F0 TX event mask */ + U32 reserved17[515]; /* 06F4 */ + U32 rxNotificationCtrl; /* 0F00 RX notification control */ +#define TOMAL_RX_NOTIFY_CTRL_COUNTER_START0 0x00000080 +#define TOMAL_RX_NOTIFY_CTRL_COUNTER_START1 0x00000040 + U32 reserved18[3]; /* 0F04 */ + U32 rxMinTimer; /* 0F10 RX minimum timer */ + U32 reserved19[3]; /* 0F14 */ + U32 rxMaxTimer; /* 0F20 RX maximum timer */ + U32 reserved20[63]; /* 0F24 */ + U32 rxHWCurrentDescriptorAddrH; /* 1020 RX HW current desc. addr. High */ + U32 reserved21[3]; /* 1024 */ + U32 rxHWCurrentDescriptorAddrL; /* 1030 RX HW current desc. addr. Low */ + U32 reserved22[3]; /* 1034 */ + U32 rxAddFreeBytes; /* 1040 num bytes in RX buffers posted */ + U32 reserved23[3]; /* 1044 */ + U32 rxTotalBufferSize; /* 1050 total size of buffers */ +#define TOMAL_RX_TOTAL_BUFFER_SIZE_MAX 0x00100000 + U32 reserved24[3]; /* 1054 */ + U32 rxNumberOfReceivedFrames; /* 1060 total frames received */ + U32 reserved25[3]; /* 1064 */ + U32 rxDroppedFramesCount; /* 1070 total frames dropped */ + U32 reserved26[3]; /* 1074 */ + U32 rxMaxFrameNum; /* 1080 num frames RX to interrupt */ + U32 reserved27[3]; /* 1084 */ + U32 rxMinFrameNum; /* 1090 num frames RX to int w/timer */ + U32 reserved28[3]; /* 1094 */ + U32 rxEventStatus; /* 10A0 RX status of */ +#define TOMAL_RX_EVENT 0x00000001 + U32 reserved29[3]; /* 10A4 */ + U32 rxEventMask; /* 10B0 RX event mask of */ + U32 reserved30[467]; /* 10B4 */ + U32 swNonCriticalErrorsStatus; /* 1800 software noncritical error status */ +#define TOMAL_SW_NONCRIT_ERRORS_TPDBC 0x00000010 +#define TOMAL_SW_NONCRIT_ERRORS_RTSDB 0x00000001 + U32 reserved31[3]; /* 1804 */ + U32 swNonCriticalErrorsEnable; /* 1810 software noncritical error enable */ + U32 reserved32[3]; /* 1814 */ + U32 swNonCriticalErrorsMask; /* 1820 software noncritical error mask */ + U32 reserved33[55]; /* 1824 */ + U32 rxDataBufferFreeSpace; /* 1900 number free entries in RX buffer */ + U32 reserved34[3]; /* 1904 */ + U32 txDataBuffer0FreeSpace; /* 1910 num free entries in TX buffer */ + U32 reserved35[3]; /* 1914 */ + U32 txDataBuffer1FreeSpace; /* 1920 num free entries in TX buffer */ + U32 reserved36[127]; /* 1924 */ + U32 rxMACStatus; /* 1B20 status from MAC for RX packets */ +#define TOMAL_RX_MAC_CODE_ERROR 0x00001000 /* XEMAC */ +#define TOMAL_RX_MAC_PARITY_ERROR 0x00000400 /* XEMAC/EMAC4 */ +#define TOMAL_RX_MAC_OVERRUN 0x00000200 /* XEMAC/EMAC4 */ +#define TOMAL_RX_MAC_PAUSE_FRAME 0x00000100 /* XEMAC/EMAC4 */ +#define TOMAL_RX_MAC_BAD_FRAME 0x00000080 /* XEMAC/EMAC4 */ +#define TOMAL_RX_MAC_RUNT_FRAME 0x00000040 /* XEMAC/EMAC4 */ +#define TOMAL_RX_MAC_SHORT_EVENT 0x00000020 /* EMAC4 */ +#define TOMAL_RX_MAC_ALIGN_ERROR 0x00000010 /* EMAC4 */ +#define TOMAL_RX_MAC_BAD_FCS 0x00000008 /* XEMAC/EMAC4 */ +#define TOMAL_RX_MAC_FRAME_TOO_LONG 0x00000004 /* XEMAC/EMAC4 */ +#define TOMAL_RX_MAC_OUT_RANGE_ERROR 0x00000002 /* XEMAC/EMAC4 */ +#define TOMAL_RX_MAC_IN_RANGE_ERROR 0x00000001 /* XEMAC/EMAC4 */ +#define TOMAL_RX_MAC_XEMAC_MASK (TOMAL_RX_MAC_CODE_ERROR | \ + TOMAL_RX_MAC_PARITY_ERROR | TOMAL_RX_MAC_OVERRUN | \ + TOMAL_RX_MAC_PAUSE_FRAME | TOMAL_RX_MAC_BAD_FRAME | \ + TOMAL_RX_MAC_RUNT_FRAME | TOMAL_RX_MAC_BAD_FCS | \ + TOMAL_RX_MAC_FRAME_TOO_LONG | TOMAL_RX_MAC_OUT_RANGE_ERROR | \ + TOMAL_RX_MAC_IN_RANGE_ERROR) + U32 reserved37[3]; /* 1B24 */ + U32 rxMACStatusEnable; /* 1B30 enable bits in rxMACStatus */ + U32 reserved38[3]; /* 1B34 */ + U32 rxMACStatusMask; /* 1B40 mask bits in rxMACStatus */ + U32 reserved39[3]; /* 1B44 */ + U32 txMACStatus; /* 1B50 status from MAC for TX packets */ +#define TOMAL_TX_MAC_LOCAL_FAULT 0x00001000 /* XEMAC */ +#define TOMAL_TX_MAC_REMOTE_FAULT 0x00000800 /* XEMAC */ +#define TOMAL_TX_MAC_BAD_FCS 0x00000200 /* EMAC4 */ +#define TOMAL_TX_MAC_PARITY_ERROR 0x00000100 /* XEMAC */ +#define TOMAL_TX_MAC_LOST_CARRIER 0x00000080 /* EMAC4 */ +#define TOMAL_TX_MAC_EXCESSIVE_DEFERRAL 0x00000040 /* EMAC4 */ +#define TOMAL_TX_MAC_EXCESSIVE_COLLISION 0x00000020 /* EMAC4 */ +#define TOMAL_TX_MAC_LATE_COLLISION 0x00000010 /* EMAC4 */ +#define TOMAL_TX_MAC_UNDERRUN 0x00000002 /* XEMAC/EMAC4 */ +#define TOMAL_TX_MAC_SQE 0x00000001 /* EMAC4 */ +#define TOMAL_TX_MAC_XEMAC_MASK (TOMAL_TX_MAC_LOCAL_FAULT | \ + TOMAL_TX_MAC_REMOTE_FAULT | TOMAL_TX_MAC_PARITY_ERROR | \ + TOMAL_TX_MAC_UNDERRUN) + U32 reserved40[3]; /* 1B54 */ + U32 txMACStatusEnable; /* 1B60 enable bits in txMACStatus */ + U32 reserved41[3]; /* 1B64 */ + U32 txMACStatusMask; /* 1B70 mask bits in txMACStatus */ + U32 reserved42[163]; /* 1B74 */ + U32 hwErrorsStatus; /* 1E00 hardware error status */ +#define TOMAL_HW_ERRORS_IRAPE 0x00000008 +#define TOMAL_HW_ERRORS_ORAPE 0x00000004 +#define TOMAL_HW_ERRORS_IDBPE 0x00000002 +#define TOMAL_HW_ERRORS_ODBPE 0x00000001 + U32 reserved43[3]; /* 1E04 */ + U32 hwErrorsEnable; /* 1E10 enable bits in hwErrorsStatus */ + U32 reserved44[3]; /* 1E14 */ + U32 hwErrorsMask; /* 1E20 mask bits in hwErrorsStatus */ + U32 reserved45[55]; /* 1E24 */ + U32 swCriticalErrorsStatus; /* 1F00 software critical error status */ +#define TOMAL_SW_CRIT_ERRORS_TDBC 0x00000002 +#define TOMAL_SW_CRIT_ERRORS_RDBC 0x00000001 + U32 reserved46[3]; /* 1F04 */ + U32 swCriticalErrorsEnable; /* 1F10 enable bits in swCriticalErrorsStatus */ + U32 reserved47[3]; /* 1F14 */ + U32 swCriticalErrorsMask; /* 1F20 mask bits in swCriticalErrorsStatus */ + U32 reserved48[3]; /* 1F24 */ + U32 rxDescriptorBadCodeFEC; /* 1F30 RX channel w/bad code descriptor */ + U32 reserved49[3]; /* 1F34 */ + U32 txDescriptorBadCodeFEC; /* 1F40 TX channel w/bad code descriptor */ + U32 reserved50[15]; /* 1F44 */ + U32 interruptStatus; /* 1F80 interrupt status register */ +#define TOMAL_INTERRUPT_TX1 0x00020000 +#define TOMAL_INTERRUPT_TX0 0x00010000 +#define TOMAL_INTERRUPT_RX1 0x00000200 +#define TOMAL_INTERRUPT_RX0 0x00000100 +#define TOMAL_INTERRUPT_TX_MAC_ERROR1 0x00000080 +#define TOMAL_INTERRUPT_TX_MAC_ERROR0 0x00000040 +#define TOMAL_INTERRUPT_RX_MAC_ERROR1 0x00000020 +#define TOMAL_INTERRUPT_RX_MAC_ERROR0 0x00000010 +#define TOMAL_INTERRUPT_PLB_PARITY_ERROR 0x00000008 +#define TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR1 0x00000004 +#define TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0 0x00000002 +#define TOMAL_INTERRUPT_CRITICAL_ERROR 0x00000001 +#define TOMAL_IRQ0_MASK (TOMAL_INTERRUPT_TX0 | TOMAL_INTERRUPT_RX0 | \ + TOMAL_INTERRUPT_TX_MAC_ERROR0 | TOMAL_INTERRUPT_RX_MAC_ERROR0 | \ + TOMAL_INTERRUPT_PLB_PARITY_ERROR | TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0 | \ + TOMAL_INTERRUPT_CRITICAL_ERROR) +#define TOMAL_IRQ1_MASK (TOMAL_INTERRUPT_TX1 | TOMAL_INTERRUPT_RX1 | \ + TOMAL_INTERRUPT_TX_MAC_ERROR1 | TOMAL_INTERRUPT_RX_MAC_ERROR1 | \ + TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR1) + U32 reserved51[3]; /* 1F84 */ + U32 interruptRoute; /* 1F90 interrupt line routing */ + U32 reserved52[51]; /* 1F94 */ + U32 rxMACBadStatusCounter; /* 2060 num frames with errors in MAC */ + U32 reserved53[999]; /* 2064 */ + U32 debugVectorsCtrl; /* 3000 */ + U32 reserved54[3]; /* 3004 */ + U32 debugVectorsReadData; /* 3010 */ +} TOMALRegs; + +typedef volatile struct _RxDesc { + U16 code; +#define TOMAL_RX_DESC_CODE 0x6000 + U16 postedLength; + U16 status; +#define TOMAL_RX_LAST 0x8000 +#define TOMAL_RX_STATUS_ENCODE_MASK 0x03f0 +#define TOMAL_RX_STATUS_TCP_UDP_CHECKSUM_PASSED 0x0008 +#define TOMAL_RX_STATUS_IP_CHECKSUM_PASSED 0x0004 +#define TOMAL_RX_STATUS_CHECKSUM_VALID 0x0002 + U16 totalFrameLength; + U16 reserved; + U16 buffHeadAddrH; /* bits 16-31 of data buffer address */ + U32 buffHeadAddrL; /* bits 32-63 of data buffer address */ +} RxDesc; + + +typedef volatile struct _TxDesc { + U8 code; +#define TOMAL_TX_DESC_CODE 0x60 +#define TOMAL_TX_SIGNAL 0x04 +#define TOMAL_TX_NOTIFY_REQ 0x02 +#define TOMAL_TX_LAST 0x01 + U8 command; +#define TOMAL_TX_ENABLE_HW_CHECKSUM 0x40 +#define TOMAL_TX_GENERATE_FCS 0x20 +#define TOMAL_TX_GENERATE_PAD 0x30 /* GENERATE_FCS must also be set */ +#define TOMAL_TX_INSERT_SOURCE_ADDR 0x08 +#define TOMAL_TX_REPLACE_SOURCE_ADDR 0x04 +#define TOMAL_TX_INSERT_VLAN_TAG 0x02 +#define TOMAL_TX_REPLACE_VLAN_TAG 0x01 + U16 postedLength; + U32 wBStatus; +#define TOMAL_TX_STATUS_GOOD 0x00010000 + U16 reserved; + U16 buffHeadAddrH; /* bits 16-31 of data buffer address */ + U32 buffHeadAddrL; /* bits 32-63 of data buffer address */ +} TxDesc; + + +typedef volatile struct _BranchDesc { + U64 code; +#define TOMAL_BRANCH_CODE 0x2000000000000000ULL + U16 reserved; + U16 nextDescAddrH; /* bits 16-31 of next descriptor address */ + U32 nextDescAddrL; /* bits 32-63 of next descriptor address (16 byte aligned) */ +} BranchDesc; + + + +typedef struct _RxDescSegment { + RxDesc* desc; + RxDesc* currDesc; + struct sk_buff** skb; + struct sk_buff** currSkb; + dma_addr_t dmaHandle; + size_t size; + BranchDesc* branchDesc; + struct _RxDescSegment* next; +} RxDescSegment; + + +typedef struct _TxDescSegment { + TxDesc* desc; + U32 oldIndex; + U32 freeIndex; + struct sk_buff** skb; + dma_addr_t dmaHandle; + size_t size; + BranchDesc* branchDesc; + struct _TxDescSegment* next; +} TxDescSegment; + + +typedef struct _TOMAL { + /* Mapping of TOMAL's HW registers. */ + TOMALRegs* regs[TOMAL_MAX_CHANNELS]; + + /* RX buffers, descriptors, and other data. */ + U32 maxRxBuffers[TOMAL_MAX_CHANNELS]; + U16 rxBufferSize[TOMAL_MAX_CHANNELS]; + RxDescSegment* rxDescSegment[TOMAL_MAX_CHANNELS]; + RxDescSegment* oldRxSegment[TOMAL_MAX_CHANNELS]; /* oldest non-served RX desc segment */ + + /* TX descriptors and other data. */ + U32 maxTxBuffers[TOMAL_MAX_CHANNELS]; + U32 pendingTxBuffers[TOMAL_MAX_CHANNELS]; + U32 numberOfTransmittedFrames[TOMAL_MAX_CHANNELS]; + U32 numberOfReceivedFrames[TOMAL_MAX_CHANNELS]; + TxDescSegment* txDescSegment[TOMAL_MAX_CHANNELS]; + TxDescSegment* oldTxSegment[TOMAL_MAX_CHANNELS]; /* oldest non-served TX desc segment */ + TxDescSegment* freeTxSegment[TOMAL_MAX_CHANNELS]; /* next free TX descriptor segment */ + + struct net_device* netDev[TOMAL_MAX_CHANNELS]; + spinlock_t rxLock[TOMAL_MAX_CHANNELS]; + spinlock_t txLock[TOMAL_MAX_CHANNELS]; + struct napi_struct napi[TOMAL_MAX_CHANNELS] ; /* 2.6.27-ism for NAPI poll */ + int irq0; + int irq1; + int count_tx_checksum_type[4] ; + struct proc_dir_entry* parentDir; + struct proc_dir_entry* tomalDir; + struct proc_dir_entry* hwDir; + struct proc_dir_entry* swDir; + U32 numberOfNetrxDrops ; + U32 numberOfHwDrops0 ; + U32 numberOfHwDrops1 ; + U32 numberOfNotLast ; + +} TOMAL; + + + +typedef enum { + tomal_ras_none = 0x00, + tomal_ras_timeout = 0x01, + tomal_ras_alloc_error = 0x02, + tomal_ras_spurious_irq = 0x03, + tomal_ras_unknown_critical_int = 0x04, + tomal_ras_unknown_noncrit_int = 0x05, + tomal_ras_ioremap_error = 0x06, + tomal_ras_irq_unavailable = 0x07, + + tomal_ras_max = 0xff +} tomal_ras_id; + + +TOMAL* __init tomal_init(void* devMapAddr, + struct net_device* netDev0, + U32 rxTotalBufferSize0, + U32 numTxBuffers0, + struct net_device* netDev1, + U32 rxTotalBufferSize1, + U32 numTxBuffers1, + int irq0, + int irq1, + struct proc_dir_entry* procDir); + +int tomal_xmit_tx_buffer(TOMAL* tomal, U8 channel, struct sk_buff* skb); +int tomal_alloc_rx_buffers(TOMAL* tomal, U8 channel); +int tomal_free_rx_buffers(TOMAL* tomal, U8 channel); +#if defined(CONFIG_BGP_E10000_NAPI) +int tomal_poll_napi(struct napi_struct * napi, int budget); +#else +int tomal_poll(struct net_device *netDev, int budget); +#endif +int tomal_process_tx_buffers(TOMAL* tomal, U8 channel, U32 txNumTransmitDesc); +void tomal_free_rx_segments(TOMAL* tomal, U8 channel); +void tomal_free_tx_segments(TOMAL* tomal, U8 channel); +void tomal_free_tx_buffers(TOMAL* tomal, U8 channel); +int tomal_alloc_rx_segments(TOMAL* tomal, U8 channel, U32 numDescriptors); +int tomal_alloc_tx_segments(TOMAL* tomal, U8 channel, U32 numDescriptors); + +int tomal_soft_reset(TOMAL* tomal); +int tomal_configure(TOMAL* tomal); + + +/* Turns all RX & TX channels off. */ +static inline void tomal_rx_tx_disable(TOMAL* tomal) +{ + U32 ccr = in_be32(&tomal->regs[0]->configurationCtrl); + + ccr &= ~(TOMAL_CFG_CTRL_RX_MAC0 | TOMAL_CFG_CTRL_RX_MAC1 | TOMAL_CFG_CTRL_TX_MAC0 | + TOMAL_CFG_CTRL_TX_MAC1); + out_be32(&tomal->regs[0]->configurationCtrl, ccr); + + return; +} + + +/* Turns all RX & TX channels on. */ +static inline void tomal_rx_tx_enable(TOMAL* tomal) +{ + out_be32(&tomal->regs[0]->configurationCtrl, TOMAL_CFG_CTRL_RX_MAC0 | + TOMAL_CFG_CTRL_RX_MAC1 | TOMAL_CFG_CTRL_TX_MAC0 | TOMAL_CFG_CTRL_TX_MAC1); + + return; +} + +void tomal_irq_enable(TOMAL* tomal, U8 channel); + + +void tomal_irq_disable(TOMAL* tomal, U8 channel); + + +int tomal_pending_rx_buffers(TOMAL* tomal, U8 channel); +int tomal_pending_tx_buffers(TOMAL* tomal, U8 channel); + +void tomal_exit(TOMAL* tomal); + + +#endif diff --git a/drivers/net/bgp_network/450_tlb.h b/drivers/net/bgp_network/450_tlb.h new file mode 100644 index 00000000000000..67f04c963c3b20 --- /dev/null +++ b/drivers/net/bgp_network/450_tlb.h @@ -0,0 +1,121 @@ +/* Basic access functions for 'software TLBs' in powerpc 440/450 */ +#ifndef __450_tlb_h__ +#define __450_tlb_h__ +#include <asm/bluegene_ras.h> + +static inline int get_tlb_pageid(int tlbindex) + { + int rc ; + /* PPC44x_TLB_PAGEID is 0 */ + asm volatile( "tlbre %[rc],%[index],0" + : [rc] "=r" (rc) + : [index] "r" (tlbindex) + ) ; + return rc ; + } + +static inline int get_tlb_xlat(int tlbindex) + { + int rc ; + /* PPC44x_TLB_XLAT is 1 */ + asm volatile( "tlbre %[rc],%[index],1" + : [rc] "=r" (rc) + : [index] "r" (tlbindex) + ) ; + return rc ; + } + +static inline int get_tlb_attrib(int tlbindex) + { + int rc ; + /* PPC44x_TLB_ATTRIB is 2 */ + asm volatile( "tlbre %[rc],%[index],2" + : [rc] "=r" (rc) + : [index] "r" (tlbindex) + ) ; + return rc ; + } + +static inline int search_tlb(unsigned int vaddr) + { + int rc ; + /* PPC44x_TLB_ATTRIB is 2 */ + asm volatile( "tlbsx %[rc],0,%[vaddr]" + : [rc] "=r" (rc) + : [vaddr] "r" (vaddr) + ) ; + return rc ; + } + +//static inline int search_tlb_validity(unsigned int vaddr) +//{ +// int validity ; +// asm volatile( "tlbsx. %[validity],0,%[vaddr]" "\n" +// "mfcr %[validity]" +// : +// [validity] "=r" (validity) +// : [vaddr] "r" (vaddr) +// : "cc" +// ) ; +// return validity ; +//} + + +static inline int search_tlb_v(unsigned int vaddr) + { + int rc ; + int tlbindex ; + int validity ; + /* PPC44x_TLB_ATTRIB is 2 */ + asm volatile( "tlbsx. %[tlbindex],0,%[vaddr]" "\n" + "mfcr %[validity]" + : [tlbindex] "=r" (tlbindex), + [validity] "=r" (validity) + : [vaddr] "r" (vaddr) + : "cc" + ) ; +// tlbindex = search_tlb(vaddr) ; +// validity=search_tlb_validity(vaddr) ; + rc = (validity & 0x20000000) | (tlbindex & 0xefffffff) ; // Hi bit for 'found', other bits (bottom 6, really) for index +// TRACEN(k_t_request,"vaddr=0x%08x tlbindex=0x%08x validity=0x%08x rc=0x%08x",vaddr,tlbindex,validity,rc) ; + return rc ; + } + +#define TLB0_EPN_1K(a) ((a)&0xFFFFFC00) /* EA[ 0:21] */ +#define TLB0_V _BN(22) /* Valid Bit */ +#define TLB0_TS _BN(23) /* Translation Address Space */ +#define TLB0_SIZE(x) _B4(27,x) /* Page Size */ +#define TLB1_ERPN(e) _B4(31,e) /* Extended RPN: 4 MSb's of 36b Physical Address */ +#define TLB1_RPN_1K(p) ((p)&0xFFFFFC00) /* RPN[ 0:21] */ + +#define TLB2_FAR _BN(10) /* Fixed Address Region */ +#define TLB2_WL1 _BN(11) /* Write-Thru L1 (when CCR1[L2COBE]=1) */ +#define TLB2_IL1I _BN(12) /* Inhibit L1-I caching (when CCR1[L2COBE]=1) */ +#define TLB2_IL1D _BN(13) /* Inhibit L1-D caching (when CCR1[L2COBE]=1) */ +#define TLB2_IL2I _BN(14) /* see below (on normal C450: Inhibit L2-I caching (when CCR1[L2COBE]=1) */ +#define TLB2_IL2D _BN(15) /* see below (on normal C450: Inhibit L2-D caching (when CCR1[L2COBE]=1) */ +#define TLB2_U0 _BN(16) /* see below (undefined/available on normal C450 */ +#define TLB2_U1 _BN(17) /* User 1: L1 Transient Enable */ +#define TLB2_U2 _BN(18) /* User 2: L1 Store WithOut Allocate #define TLB2_U3 _BN(19) // see below (on normal C450: User 3: L3 Prefetch Inhibit (0=Enabled, 1=Inhibited) */ +#define TLB2_U3 _BN(19) /* see below (on normal C450: User 3: L3 Prefetch Inhibit (0=Enabled, 1=Inhibited) */ +#define TLB2_W _BN(20) /* Write-Thru=1, Write-Back=0 */ +#define TLB2_I _BN(21) /* Cache-Inhibited=1, Cacheable=0 */ +#define TLB2_M _BN(22) /* Memory Coherence Required */ +#define TLB2_G _BN(23) /* Guarded */ +#define TLB2_E _BN(24) /* Endian: 0=Big, 1=Little */ +#define TLB2_UX _BN(26) /* User Execute Enable */ +#define TLB2_UW _BN(27) /* User Write Enable */ +#define TLB2_UR _BN(28) /* User Read Enable */ +#define TLB2_SX _BN(29) /* Supervisor Execute Enable */ +#define TLB2_SW _BN(30) /* Supervisor Write Enable */ +#define TLB2_SR _BN(31) /* Supervisor Read Enable */ + +/* BGP Specific controls */ +#define TLB2_IL3I (TLB2_IL2I) /* L3 Inhibit for Instruction Fetches */ +#define TLB2_IL3D (TLB2_IL2D) /* L3 Inhibit for Data Accesses */ +#define TLB2_IL2 (TLB2_U0) /* U0 is L2 Prefetch Inhibit */ +#define TLB2_T (TLB2_U1) /* U1 Transient Enabled is supported. */ +#define TLB2_SWOA (TLB2_U2) /* U2 Store WithOut Allocate is supported. */ +#define TLB2_L2_PF_OPT (TLB2_U3) /* U3 is L2 Optimiztic Prefetch ("Automatic" when 0) */ + +#endif diff --git a/drivers/net/bgp_network/bgdiagnose.h b/drivers/net/bgp_network/bgdiagnose.h new file mode 100644 index 00000000000000..be205219da263b --- /dev/null +++ b/drivers/net/bgp_network/bgdiagnose.h @@ -0,0 +1,183 @@ +/* + * bgdiagnose.h + * + * Diagnostic routines for 450/BGP bringup + * + */ +#ifndef __DRIVERS__NET__BLUEGENE__BGDIAGNOSE_H__ +#define __DRIVERS__NET__BLUEGENE__BGDIAGNOSE_H__ +/* #include <asm/bluegene.h> */ + +#include <linux/kernel.h> +/* #include <asm/bgp_personality.h> */ +#include <asm/bluegene_ras.h> +#include "450_tlb.h" + +/* static BGP_Personality_t* bgp_personality ; */ + +/* static void show_personality_kernel(BGP_Personality_Kernel_t * Kernel_Config) */ +/* { */ +/* printk(KERN_INFO "show_personality_kernel L1Config=0x%08x L2Config=0x%08x L3Config=0x%08x L3Select=0x%08x FreqMHz=%d NodeConfig=0x%08x\n", */ +/* Kernel_Config->L1Config, */ +/* Kernel_Config->L2Config, */ +/* Kernel_Config->L3Config, */ +/* Kernel_Config->L3Select, */ +/* Kernel_Config->FreqMHz, */ +/* Kernel_Config->NodeConfig) ; */ +/* */ +/* } */ +/* static void show_personality(void) */ +/* { */ +/* // bgp_personality = bgcns()->getPersonalityData(); */ +/* // show_personality_kernel(&bgp_personality->Kernel_Config) ; */ +/* } */ + +static const char* TLB_SIZES[] = { + " 1K", /* 0 */ + " 4K", + " 16K", + " 64K", + "256K", + " 1M", + "?-6?", + " 16M", + "?-8?", + "256M", + " 1G", + "?11?", + "?12?", + "?13?", + "?14?", + "?15?" +}; + +#include "450_tlb.h" + + +static void show_tlbs(unsigned int vaddr) __attribute__ ((unused)) ; +static void show_tlbs(unsigned int vaddr) { + + int i; + uint32_t t0, t1, t2; + int tlb_index = search_tlb(vaddr) ; + for (i = 0; i < 64; i++) { + t0 = get_tlb_pageid(i) ; + t1 = get_tlb_xlat(i) ; + t2 = get_tlb_attrib(i) ; +/* _bgp_mftlb(i,t0,t1,t2); */ +/* if (t0 & TLB0_V) { */ + { + printk(KERN_INFO + "TLB 0x%02x %08x-%08x-%08x EPN=%08x RPN=%01x-%08x size=%s WIMG=%d%d%d%d U=%d%d%d%d V=%d\n", + i, + t0, t1, t2, + TLB0_EPN_1K(t0), + TLB1_ERPN(t1),TLB1_RPN_1K(t1), + TLB_SIZES[(t0 & 0xF0) >> 4], + (t2 & TLB2_W) ? 1 : 0, + (t2 & TLB2_I) ? 1 : 0, + (t2 & TLB2_M) ? 1 : 0, + (t2 & TLB2_G) ? 1 : 0, + (t2 & TLB2_U0) ? 1 : 0, + (t2 & TLB2_U1) ? 1 : 0, + (t2 & TLB2_U2) ? 1 : 0, + (t2 & TLB2_U3) ? 1 : 0, + (t0 & TLB0_V) ? 1 : 0 + ); + } + } + printk(KERN_INFO "vaddr=0x%08x tlb_index=%d\n", vaddr,tlb_index) ; +} + +static void show_tlb_for_vaddr(unsigned int vaddr) __attribute__ ((unused)) ; +static void show_tlb_for_vaddr(unsigned int vaddr) +{ + int i = search_tlb(vaddr) & 0x3f ; + uint32_t t0 = get_tlb_pageid(i) ; + uint32_t t1 = get_tlb_xlat(i) ; + uint32_t t2 = get_tlb_attrib(i) ; + printk(KERN_INFO + "TLB 0x%02x %08x-%08x-%08x EPN=%08x RPN=%01x-%08x size=%s WIMG=%d%d%d%d U=%d%d%d%d V=%d\n", + i, + t0, t1, t2, + TLB0_EPN_1K(t0), + TLB1_ERPN(t1),TLB1_RPN_1K(t1), + TLB_SIZES[(t0 & 0xF0) >> 4], + (t2 & TLB2_W) ? 1 : 0, + (t2 & TLB2_I) ? 1 : 0, + (t2 & TLB2_M) ? 1 : 0, + (t2 & TLB2_G) ? 1 : 0, + (t2 & TLB2_U0) ? 1 : 0, + (t2 & TLB2_U1) ? 1 : 0, + (t2 & TLB2_U2) ? 1 : 0, + (t2 & TLB2_U3) ? 1 : 0, + (t0 & TLB0_V) ? 1 : 0 + ); + +} +static inline unsigned int move_from_spr(unsigned int sprNum) + { + unsigned long sprVal = 0; + + asm volatile ("mfspr %0,%1\n" : "=r"(sprVal) : "i" (sprNum)); + + return sprVal; + + } +static inline void show_spr(unsigned int spr, const char *name) + { + printk(KERN_INFO "%s[%03x] = 0x%08x\n",name,spr, move_from_spr(spr)) ; + } + +static inline unsigned int move_from_dcr(unsigned int dcrNum) +{ + unsigned long dcrVal = 0; + + asm volatile("mfdcrx %0,%1": "=r" (dcrVal) : "r" (dcrNum) : "memory"); + + return dcrVal; +} + +static inline unsigned int move_from_msr(void) +{ + unsigned long msrVal = 0; + + asm volatile("mfmsr %0" : "=r" (msrVal) : : "memory"); + + return msrVal; +} + +static inline void show_msr(void) + { + printk(KERN_INFO "MSR = 0x%08x\n",move_from_msr()) ; + } + +static void show_dcr_range(unsigned int start, unsigned int length) __attribute__ ((unused)) ; +static void show_dcr_range(unsigned int start, unsigned int length) + { + unsigned int x ; + for( x=0;x<length;x+=8 ) + { + unsigned int dcrx=start+x ; + printk(KERN_INFO "dcr[%04x]=[%08x %08x %08x %08x %08x %08x %08x %08x]\n", + start+x, + move_from_dcr(dcrx),move_from_dcr(dcrx+1),move_from_dcr(dcrx+2),move_from_dcr(dcrx+3), + move_from_dcr(dcrx+4),move_from_dcr(dcrx+5),move_from_dcr(dcrx+6),move_from_dcr(dcrx+7) + ) ; + } + } +static void show_sprs(void) __attribute__ ((unused)) ; +static void show_sprs(void) +{ + show_msr() ; + show_spr(0x3b3,"CCR0") ; + show_spr(0x378,"CCR1") ; + show_spr(0x3b2,"MMUCR") ; + show_spr(0x39b,"RSTCFG") ; +/* show_dcr_range(0x500,32) ; // _BGP_DCR_L30 */ +/* show_dcr_range(0x540,32) ; // _BGP_DCR_L31 */ +/* show_dcr_range(0xd00,16) ; // _BGP_DCR_DMA */ + + } + +#endif diff --git a/drivers/net/bgp_network/bgp_net_traceflags.h b/drivers/net/bgp_network/bgp_net_traceflags.h new file mode 100644 index 00000000000000..1a148f2064c3a5 --- /dev/null +++ b/drivers/net/bgp_network/bgp_net_traceflags.h @@ -0,0 +1,56 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Description: Blue Gene low-level driver for collective and torus + * + * + ********************************************************************/ +#ifndef __BGP_NET_TRACEFLAGS_H__ +#define __BGP_NET_TRACEFLAGS_H__ + +enum { + k_t_general = 0x01 , + k_t_lowvol = 0x02 , + k_t_irqflow = 0x04 , + k_t_irqflow_rcv = 0x08 , + k_t_protocol = 0x10 , + k_t_detail = 0x20 , + k_t_fifocontents = 0x40 , + k_t_toruspkt = 0x80 , + k_t_bgcolpkt = 0x80 , + k_t_init = 0x100 , + k_t_request = 0x200 , + k_t_error = 0x400 , + k_t_sync = 0x800 , + k_t_api = 0x1000 , + k_t_diagnosis = 0x2000 , + k_t_congestion = 0x4000 , + k_t_startxmit = 0x8000 , + k_t_napi = 0x10000 , + k_t_scattergather = 0x20000 , + k_t_flowcontrol = 0x40000 , + k_t_entryexit = 0x80000 , + k_t_dmacopy = 0x100000 , + k_t_fpucopy = 0x200000 , + k_t_sgdiag = 0x400000 , + k_t_sgdiag_detail = 0x800000 , + k_t_inject_detail = 0x1000000 , +}; + +#endif diff --git a/drivers/net/bgp_statistics/Makefile b/drivers/net/bgp_statistics/Makefile new file mode 100644 index 00000000000000..666c9b9cdd631b --- /dev/null +++ b/drivers/net/bgp_statistics/Makefile @@ -0,0 +1,4 @@ +# Makefile for BlueGene collective and torus driver + + +obj-$(CONFIG_BGP_STATISTICS) += bgp_stats.o diff --git a/drivers/net/bgp_statistics/bgp_stats.c b/drivers/net/bgp_statistics/bgp_stats.c new file mode 100644 index 00000000000000..e180cb500a238e --- /dev/null +++ b/drivers/net/bgp_statistics/bgp_stats.c @@ -0,0 +1,258 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * + * Description: Statistic collection for Blue Gene low-level driver for sockets over torus + * + * + ********************************************************************/ +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/highmem.h> +#include <linux/mman.h> +#include <linux/syscalls.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> + +#include <linux/cpu.h> +#include <linux/cpuset.h> +#include <linux/bootmem.h> + +#include <linux/alignment_histograms.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/time.h> +#include <linux/vmalloc.h> + +#include <linux/dma-mapping.h> + +#include <net/inet_connection_sock.h> +#include <net/inet_sock.h> +#include <net/inet_hashtables.h> +#include <net/tcp.h> + + +static int bgp_statistics_init (void); +static void bgp_statistics_cleanup (void); + +module_init(bgp_statistics_init); +module_exit(bgp_statistics_cleanup); + + +MODULE_DESCRIPTION("BG/P statistics driver"); +MODULE_LICENSE("GPL"); + +#ifndef CTL_UNNUMBERED +#define CTL_UNNUMBERED -2 +#endif + +/* Parameters, statistics, and debugging */ +#if defined(CONFIG_DEBUG_ALIGNMENT_HISTOGRAM) +struct alignment_histogram al_histogram ; +#endif + +static struct ctl_path bgp_statistics_ctl_path[] = { + { .procname = "bgp", .ctl_name = 0, }, + { .procname = "statistics", .ctl_name = 0, }, +/* { .procname = "torus", .ctl_name = 0, }, */ + { }, +}; + +#define CTL_PARAM_EXT(Name,Var) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .data = &(Var), \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_dointvec \ + } + +#define CTL_PARAM_EXT_LL(Name,Var) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .data = &(Var), \ + .maxlen = 2*sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_dointvec \ + } + + +struct ctl_table bgp_statistics_table[] = { +#if defined(CONFIG_DEBUG_ALIGNMENT_HISTOGRAM) + CTL_PARAM_EXT("ah_min",al_histogram.min_size_of_interest) , + CTL_PARAM_EXT("sah0",AL_HISTOGRAM(src_alignment_histogram_crc,0)) , + CTL_PARAM_EXT("sah1",AL_HISTOGRAM(src_alignment_histogram_crc,1)) , + CTL_PARAM_EXT("sah2",AL_HISTOGRAM(src_alignment_histogram_crc,2)) , + CTL_PARAM_EXT("sah3",AL_HISTOGRAM(src_alignment_histogram_crc,3)) , + CTL_PARAM_EXT("sah4",AL_HISTOGRAM(src_alignment_histogram_crc,4)) , + CTL_PARAM_EXT("sah5",AL_HISTOGRAM(src_alignment_histogram_crc,5)) , + CTL_PARAM_EXT("sah6",AL_HISTOGRAM(src_alignment_histogram_crc,6)) , + CTL_PARAM_EXT("sah7",AL_HISTOGRAM(src_alignment_histogram_crc,7)) , + CTL_PARAM_EXT("sah8",AL_HISTOGRAM(src_alignment_histogram_crc,8)) , + CTL_PARAM_EXT("sah9",AL_HISTOGRAM(src_alignment_histogram_crc,9)) , + CTL_PARAM_EXT("saha",AL_HISTOGRAM(src_alignment_histogram_crc,10)) , + CTL_PARAM_EXT("sahb",AL_HISTOGRAM(src_alignment_histogram_crc,11)) , + CTL_PARAM_EXT("sahc",AL_HISTOGRAM(src_alignment_histogram_crc,12)) , + CTL_PARAM_EXT("sahd",AL_HISTOGRAM(src_alignment_histogram_crc,13)) , + CTL_PARAM_EXT("sahe",AL_HISTOGRAM(src_alignment_histogram_crc,14)) , + CTL_PARAM_EXT("sahf",AL_HISTOGRAM(src_alignment_histogram_crc,15)) , + CTL_PARAM_EXT("dah0",AL_HISTOGRAM(dst_alignment_histogram_crc,0)) , + CTL_PARAM_EXT("dah1",AL_HISTOGRAM(dst_alignment_histogram_crc,1)) , + CTL_PARAM_EXT("dah2",AL_HISTOGRAM(dst_alignment_histogram_crc,2)) , + CTL_PARAM_EXT("dah3",AL_HISTOGRAM(dst_alignment_histogram_crc,3)) , + CTL_PARAM_EXT("dah4",AL_HISTOGRAM(dst_alignment_histogram_crc,4)) , + CTL_PARAM_EXT("dah5",AL_HISTOGRAM(dst_alignment_histogram_crc,5)) , + CTL_PARAM_EXT("dah6",AL_HISTOGRAM(dst_alignment_histogram_crc,6)) , + CTL_PARAM_EXT("dah7",AL_HISTOGRAM(dst_alignment_histogram_crc,7)) , + CTL_PARAM_EXT("dah8",AL_HISTOGRAM(dst_alignment_histogram_crc,8)) , + CTL_PARAM_EXT("dah9",AL_HISTOGRAM(dst_alignment_histogram_crc,9)) , + CTL_PARAM_EXT("daha",AL_HISTOGRAM(dst_alignment_histogram_crc,10)) , + CTL_PARAM_EXT("dahb",AL_HISTOGRAM(dst_alignment_histogram_crc,11)) , + CTL_PARAM_EXT("dahc",AL_HISTOGRAM(dst_alignment_histogram_crc,12)) , + CTL_PARAM_EXT("dahd",AL_HISTOGRAM(dst_alignment_histogram_crc,13)) , + CTL_PARAM_EXT("dahe",AL_HISTOGRAM(dst_alignment_histogram_crc,14)) , + CTL_PARAM_EXT("dahf",AL_HISTOGRAM(dst_alignment_histogram_crc,15)) , + CTL_PARAM_EXT("rah0",AL_HISTOGRAM(rel_alignment_histogram_crc,0)) , + CTL_PARAM_EXT("rah1",AL_HISTOGRAM(rel_alignment_histogram_crc,1)) , + CTL_PARAM_EXT("rah2",AL_HISTOGRAM(rel_alignment_histogram_crc,2)) , + CTL_PARAM_EXT("rah3",AL_HISTOGRAM(rel_alignment_histogram_crc,3)) , + CTL_PARAM_EXT("rah4",AL_HISTOGRAM(rel_alignment_histogram_crc,4)) , + CTL_PARAM_EXT("rah5",AL_HISTOGRAM(rel_alignment_histogram_crc,5)) , + CTL_PARAM_EXT("rah6",AL_HISTOGRAM(rel_alignment_histogram_crc,6)) , + CTL_PARAM_EXT("rah7",AL_HISTOGRAM(rel_alignment_histogram_crc,7)) , + CTL_PARAM_EXT("rah8",AL_HISTOGRAM(rel_alignment_histogram_crc,8)) , + CTL_PARAM_EXT("rah9",AL_HISTOGRAM(rel_alignment_histogram_crc,9)) , + CTL_PARAM_EXT("raha",AL_HISTOGRAM(rel_alignment_histogram_crc,10)) , + CTL_PARAM_EXT("rahb",AL_HISTOGRAM(rel_alignment_histogram_crc,11)) , + CTL_PARAM_EXT("rahc",AL_HISTOGRAM(rel_alignment_histogram_crc,12)) , + CTL_PARAM_EXT("rahd",AL_HISTOGRAM(rel_alignment_histogram_crc,13)) , + CTL_PARAM_EXT("rahe",AL_HISTOGRAM(rel_alignment_histogram_crc,14)) , + CTL_PARAM_EXT("rahf",AL_HISTOGRAM(rel_alignment_histogram_crc,15)) , + CTL_PARAM_EXT("scah0",AL_HISTOGRAM(src_alignment_histogram_copy,0)) , + CTL_PARAM_EXT("scah1",AL_HISTOGRAM(src_alignment_histogram_copy,1)) , + CTL_PARAM_EXT("scah2",AL_HISTOGRAM(src_alignment_histogram_copy,2)) , + CTL_PARAM_EXT("scah3",AL_HISTOGRAM(src_alignment_histogram_copy,3)) , + CTL_PARAM_EXT("scah4",AL_HISTOGRAM(src_alignment_histogram_copy,4)) , + CTL_PARAM_EXT("scah5",AL_HISTOGRAM(src_alignment_histogram_copy,5)) , + CTL_PARAM_EXT("scah6",AL_HISTOGRAM(src_alignment_histogram_copy,6)) , + CTL_PARAM_EXT("scah7",AL_HISTOGRAM(src_alignment_histogram_copy,7)) , + CTL_PARAM_EXT("scah8",AL_HISTOGRAM(src_alignment_histogram_copy,8)) , + CTL_PARAM_EXT("scah9",AL_HISTOGRAM(src_alignment_histogram_copy,9)) , + CTL_PARAM_EXT("scaha",AL_HISTOGRAM(src_alignment_histogram_copy,10)) , + CTL_PARAM_EXT("scahb",AL_HISTOGRAM(src_alignment_histogram_copy,11)) , + CTL_PARAM_EXT("scahc",AL_HISTOGRAM(src_alignment_histogram_copy,12)) , + CTL_PARAM_EXT("scahd",AL_HISTOGRAM(src_alignment_histogram_copy,13)) , + CTL_PARAM_EXT("scahe",AL_HISTOGRAM(src_alignment_histogram_copy,14)) , + CTL_PARAM_EXT("scahf",AL_HISTOGRAM(src_alignment_histogram_copy,15)) , + CTL_PARAM_EXT("dcah0",AL_HISTOGRAM(dst_alignment_histogram_copy,0)) , + CTL_PARAM_EXT("dcah1",AL_HISTOGRAM(dst_alignment_histogram_copy,1)) , + CTL_PARAM_EXT("dcah2",AL_HISTOGRAM(dst_alignment_histogram_copy,2)) , + CTL_PARAM_EXT("dcah3",AL_HISTOGRAM(dst_alignment_histogram_copy,3)) , + CTL_PARAM_EXT("dcah4",AL_HISTOGRAM(dst_alignment_histogram_copy,4)) , + CTL_PARAM_EXT("dcah5",AL_HISTOGRAM(dst_alignment_histogram_copy,5)) , + CTL_PARAM_EXT("dcah6",AL_HISTOGRAM(dst_alignment_histogram_copy,6)) , + CTL_PARAM_EXT("dcah7",AL_HISTOGRAM(dst_alignment_histogram_copy,7)) , + CTL_PARAM_EXT("dcah8",AL_HISTOGRAM(dst_alignment_histogram_copy,8)) , + CTL_PARAM_EXT("dcah9",AL_HISTOGRAM(dst_alignment_histogram_copy,9)) , + CTL_PARAM_EXT("dcaha",AL_HISTOGRAM(dst_alignment_histogram_copy,10)) , + CTL_PARAM_EXT("dcahb",AL_HISTOGRAM(dst_alignment_histogram_copy,11)) , + CTL_PARAM_EXT("dcahc",AL_HISTOGRAM(dst_alignment_histogram_copy,12)) , + CTL_PARAM_EXT("dcahd",AL_HISTOGRAM(dst_alignment_histogram_copy,13)) , + CTL_PARAM_EXT("dcahe",AL_HISTOGRAM(dst_alignment_histogram_copy,14)) , + CTL_PARAM_EXT("dcahf",AL_HISTOGRAM(dst_alignment_histogram_copy,15)) , + CTL_PARAM_EXT("rcah0",AL_HISTOGRAM(rel_alignment_histogram_copy,0)) , + CTL_PARAM_EXT("rcah1",AL_HISTOGRAM(rel_alignment_histogram_copy,1)) , + CTL_PARAM_EXT("rcah2",AL_HISTOGRAM(rel_alignment_histogram_copy,2)) , + CTL_PARAM_EXT("rcah3",AL_HISTOGRAM(rel_alignment_histogram_copy,3)) , + CTL_PARAM_EXT("rcah4",AL_HISTOGRAM(rel_alignment_histogram_copy,4)) , + CTL_PARAM_EXT("rcah5",AL_HISTOGRAM(rel_alignment_histogram_copy,5)) , + CTL_PARAM_EXT("rcah6",AL_HISTOGRAM(rel_alignment_histogram_copy,6)) , + CTL_PARAM_EXT("rcah7",AL_HISTOGRAM(rel_alignment_histogram_copy,7)) , + CTL_PARAM_EXT("rcah8",AL_HISTOGRAM(rel_alignment_histogram_copy,8)) , + CTL_PARAM_EXT("rcah9",AL_HISTOGRAM(rel_alignment_histogram_copy,9)) , + CTL_PARAM_EXT("rcaha",AL_HISTOGRAM(rel_alignment_histogram_copy,10)) , + CTL_PARAM_EXT("rcahb",AL_HISTOGRAM(rel_alignment_histogram_copy,11)) , + CTL_PARAM_EXT("rcahc",AL_HISTOGRAM(rel_alignment_histogram_copy,12)) , + CTL_PARAM_EXT("rcahd",AL_HISTOGRAM(rel_alignment_histogram_copy,13)) , + CTL_PARAM_EXT("rcahe",AL_HISTOGRAM(rel_alignment_histogram_copy,14)) , + CTL_PARAM_EXT("rcahf",AL_HISTOGRAM(rel_alignment_histogram_copy,15)) , + CTL_PARAM_EXT("tagh0",AL_HISTOGRAM(tagged,0)) , + CTL_PARAM_EXT("tagh1",AL_HISTOGRAM(tagged,1)) , + CTL_PARAM_EXT("tagh2",AL_HISTOGRAM(tagged,2)) , + CTL_PARAM_EXT("tagh3",AL_HISTOGRAM(tagged,3)) , + CTL_PARAM_EXT("tagh4",AL_HISTOGRAM(tagged,4)) , + CTL_PARAM_EXT("tagh5",AL_HISTOGRAM(tagged,5)) , + CTL_PARAM_EXT("tagh6",AL_HISTOGRAM(tagged,6)) , + CTL_PARAM_EXT("tagh7",AL_HISTOGRAM(tagged,7)) , + CTL_PARAM_EXT("tagh8",AL_HISTOGRAM(tagged,8)) , + CTL_PARAM_EXT("tagh9",AL_HISTOGRAM(tagged,9)) , + CTL_PARAM_EXT("tagha",AL_HISTOGRAM(tagged,10)) , + CTL_PARAM_EXT("taghb",AL_HISTOGRAM(tagged,11)) , + CTL_PARAM_EXT("taghc",AL_HISTOGRAM(tagged,12)) , + CTL_PARAM_EXT("taghd",AL_HISTOGRAM(tagged,13)) , + CTL_PARAM_EXT("taghe",AL_HISTOGRAM(tagged,14)) , + CTL_PARAM_EXT("taghf",AL_HISTOGRAM(tagged,15)) , + CTL_PARAM_EXT_LL("qcopy",al_histogram.qcopybytes) , + CTL_PARAM_EXT_LL("copy",al_histogram.copybytes) , + CTL_PARAM_EXT_LL("copyshort",al_histogram.copybytesshort) , + CTL_PARAM_EXT_LL("copymisalign",al_histogram.copybytesmisalign) , + CTL_PARAM_EXT_LL("copybroke",al_histogram.copybytesbroke) , + CTL_PARAM_EXT_LL("crcb",al_histogram.crcbytes) , + CTL_PARAM_EXT_LL("csumpartial",al_histogram.csumpartialbytes) , +#endif + { 0 }, +}; + + + +static void register_statistics_sysctl(void) +{ + register_sysctl_paths(bgp_statistics_ctl_path,bgp_statistics_table) ; +} +static int bgp_statistics_init(void) + { + register_statistics_sysctl() ; + return 0 ; + } + +static void bgp_statistics_cleanup (void) +{ + +} diff --git a/drivers/net/bgp_torus/Makefile b/drivers/net/bgp_torus/Makefile new file mode 100644 index 00000000000000..4ed8b2021bb3f3 --- /dev/null +++ b/drivers/net/bgp_torus/Makefile @@ -0,0 +1,8 @@ +# Makefile for BlueGene collective and torus driver + +EXTRA_CFLAGS += -I$(BGPHOME)/bgp/arch/include -Iarch/powerpc/syslib/bgdd/ -Iarch/ppc/syslib/bgdd/ -g -dA -D__LINUX_KERNEL__ + +bgp_torus-y := bgp_fpu_memcpy.o bgp_dma_tcp_frames.o bgp_dma_tcp.o bgtornic.o torus.o bgp_dma_tcp_diagnose.o bgp_dma_ioctl.o +bgp_torus-$(CONFIG_BLUEGENE_DMA_MEMCPY) += bgp_dma_memcpy.o + +obj-$(CONFIG_BGP_TORUS) += bgp_torus.o diff --git a/drivers/net/bgp_torus/bgp_bic_diagnosis.h b/drivers/net/bgp_torus/bgp_bic_diagnosis.h new file mode 100644 index 00000000000000..4ac45edfba4474 --- /dev/null +++ b/drivers/net/bgp_torus/bgp_bic_diagnosis.h @@ -0,0 +1,75 @@ +/* These are defined by the hardware. */ +#define NR_BIC_GROUPS 15 +#define NR_BIC_GINTS 32 +#define NR_BIC_CPUS 4 + +/* 4-bit target value for target register */ +#define BIC_TARGET_MASK (0xf) +#define BIC_TARGET_TYPE_NORMAL (1<<2) +#define BIC_TARGET_NORMAL(cpu) (BIC_TARGET_TYPE_NORMAL|(cpu)) +#define BIC_DEFAULT_CPU 0 + +/* Define the layout of each group's registers. + * This layout should be 0x80 bytes long (including pad). + */ +struct bic_group_regs { + uint32_t status; /* 0x00 RW */ + uint32_t rd_clr_status; /* 0x04 RO */ + uint32_t status_clr; /* 0x08 WO */ + uint32_t status_set; /* 0x0c WO */ + uint32_t target[4]; /* 0x10 RW */ + uint32_t normal[NR_BIC_CPUS]; /* 0x20 RW */ + uint32_t critical[NR_BIC_CPUS]; /* 0x30 RW */ + uint32_t mcheck[NR_BIC_CPUS]; /* 0x40 RW */ + uint32_t _pad[12]; /* 0x50 */ +}; + +/* Define the layout of the interrupt controller mem mapped regs. */ +struct bic_regs { + struct bic_group_regs group[NR_BIC_GROUPS]; /* 0x000 */ + uint32_t hier_normal[NR_BIC_CPUS]; /* 0x780 */ + uint32_t hier_critical[NR_BIC_CPUS]; /* 0x790 */ + uint32_t hier_mcheck[NR_BIC_CPUS]; /* 0x7a0 */ +}; + +struct bic { + spinlock_t mask_lock; /* could be finer grained if necessary */ + struct bic_regs *regs; +} ; + +extern volatile struct bic bic; + +/* void show_bic_regs(void) ; // diagnostic 'printk' of the BIC */ +static void show_bic_group(int g, volatile struct bic_group_regs* gp) __attribute__ ((unused)) ; +static void show_bic_group(int g, volatile struct bic_group_regs* gp) +{ + printk(KERN_NOTICE "bic_group_regs[%d] status=%08x target=[%08x %08x %08x %08x]\n",g,gp->status, gp->target[0], gp->target[1], gp->target[2], gp->target[3]) ; + printk(KERN_NOTICE "bic_group_regs[%d] normal=[%08x %08x %08x %08x] critical=[%08x %08x %08x %08x] mcheck=[%08x %08x %08x %08x]\n",g, gp->normal[0], gp->normal[1], gp->normal[2], gp->normal[3], gp->critical[0],gp->critical[1],gp->critical[2],gp->critical[3],gp->mcheck[0],gp->mcheck[1],gp->mcheck[2],gp->mcheck[3]) ; +} + +static void show_bic_regs(void) __attribute__ ((unused)) ; +static void show_bic_regs(void) +{ + struct bic_regs * bic_regs = bic.regs ; + int g ; + for( g = 0 ; g < NR_BIC_GROUPS ; g += 1 ) + { + show_bic_group(g,bic_regs->group+g) ; + } + printk(KERN_NOTICE "BIC hier_normal=%08x %08x %08x %08x\n", + bic_regs->hier_normal[0], + bic_regs->hier_normal[1], + bic_regs->hier_normal[2], + bic_regs->hier_normal[3]) ; + printk(KERN_NOTICE "BIC hier_critical=%08x %08x %08x %08x\n", + bic_regs->hier_critical[0], + bic_regs->hier_critical[1], + bic_regs->hier_critical[2], + bic_regs->hier_critical[3]) ; + printk(KERN_NOTICE "BIC hier_mcheck=%08x %08x %08x %08x\n", + bic_regs->hier_mcheck[0], + bic_regs->hier_mcheck[1], + bic_regs->hier_mcheck[2], + bic_regs->hier_mcheck[3]) ; + +} diff --git a/drivers/net/bgp_torus/bgp_dma_ioctl.c b/drivers/net/bgp_torus/bgp_dma_ioctl.c new file mode 100644 index 00000000000000..0873a360156cb4 --- /dev/null +++ b/drivers/net/bgp_torus/bgp_dma_ioctl.c @@ -0,0 +1,677 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Description: Blue Gene low-level driver for sockets over torus + * 'ioctl' and 'procfs' support + * + ********************************************************************/ +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/highmem.h> +#include <linux/mman.h> +#include <linux/syscalls.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> + +#include <linux/cpu.h> +#include <linux/cpuset.h> +#include <linux/bootmem.h> + + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/time.h> +#include <linux/vmalloc.h> + +#include <linux/dma-mapping.h> + +#include <net/inet_connection_sock.h> +#include <net/inet_sock.h> +#include <net/inet_hashtables.h> +#include <net/tcp.h> +#include <net/tcp_hiatus.h> + +#include <spi/linux_kernel_spi.h> + +#include "bgp_dma_tcp.h" + +#include "bgp_bic_diagnosis.h" +#include "../bgp_network/bgdiagnose.h" + +/* #define TRUST_TORUS_CRC */ + +#define SEND_SHORT_FRAMES_INLINE +#define ENABLE_TUNING + +#define ENABLE_LEARNING_ADDRESSES + +#if !defined(CONFIG_BLUEGENE_TCP_WITHOUT_NAPI) +/* Select operation with linux 'dev->poll' */ +#define TORNIC_DEV_POLL + +/* #if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_VRNIC) */ +/* #define TORNIC_STEAL_POLL_CORE */ +/* #endif */ + +#endif + +#if defined(CONFIG_TCP_CONGESTION_OVERRIDES) +extern int sysctl_tcp_force_nodelay ; +extern int sysctl_tcp_permit_cwnd ; +extern int sysctl_tcp_max_cwnd ; +#endif + +int sysctl_bgp_torus_backlog_floor ; +int bgp_dma_sockproto ; /* Used elsewhere to control whether we try accelerated sockets */ + +extern int bgtornic_driverparm ; /* Parametrisation for bringup of 'tornic' device */ + +static int proc_dodcr(struct ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) ; + +static int proc_dodcr_c8b(struct ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) ; + +static int proc_dodcr(struct ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) + { + int rc ; + TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ; + dma_tcp_state.tuning_recfifo_threshold=mfdcrx(0xd3a) ; + rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ; + mtdcrx(0xd3a,dma_tcp_state.tuning_recfifo_threshold) ; + TRACE("(<)") ; + return rc ; + } + +static int proc_dodcr_c8b(struct ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) + { + int rc ; + dumptorusdcrs() ; + TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ; + dma_tcp_state.tuning_dcr_c8b=mfdcrx(0xc8b) ; + rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ; + mtdcrx(0xc8b,dma_tcp_state.tuning_dcr_c8b) ; + TRACE("(<)") ; + return rc ; + } + + + +static struct ctl_path bgp_torus_ctl_path[] = { + { .procname = "bgp", .ctl_name = 0, }, + { .procname = "torus", .ctl_name = 0, }, + { }, +}; + +#define CTL_PARAM(Name,Var) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .data = &dma_tcp_state.Var , \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_dointvec \ + } + +#define CTL_PARAM_DCR(Name,Var) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .data = &dma_tcp_state.Var , \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_dodcr \ + } + +#define CTL_PARAM_DCR_C8B(Name,Var) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .data = &dma_tcp_state.Var , \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_dodcr_c8b \ + } + +#define CTL_PARAM_HWFIFO(Name,Var) \ + { \ + .ctl_name = CTL_UNNUMBERED, \ + .procname = Name , \ + .data = &dma_tcp_state.Var , \ + .maxlen = sizeof(int), \ + .mode = 0644, \ + .proc_handler = &proc_dohwfifo \ + } + +struct ctl_table bgp_dma_table[] = { +#if defined(USE_SKB_TO_SKB) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "dma_rec_counters", + .data = bgp_dma_tcp_counter_copies, + .maxlen = DMA_NUM_COUNTERS_PER_GROUP*sizeof(int), + .mode = 0644, + .proc_handler = &proc_do_dma_rec_counters + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "flow_counter", + .data = dma_tcp_state.flow_counter, + .maxlen = k_flow_counters*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +#endif + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tracemask", + .data = &bgp_dma_tcp_tracemask, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "backlog_floor", + .data = &sysctl_bgp_torus_backlog_floor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sockproto", + .data = &bgp_dma_sockproto, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "ethem", + .data = &bgp_dma_ethem, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tornic_driverparm", + .data = &bgtornic_driverparm, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +/* { */ +/* .ctl_name = CTL_UNNUMBERED, */ +/* .procname = "tornic_count", */ +/* .data = &bgp_tornic_count, */ +/* .maxlen = sizeof(int), */ +/* .mode = 0644, */ +/* .proc_handler = &proc_dointvec */ +/* }, */ + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tx_by_core", + .data = dma_tcp_state.tx_by_core, + .maxlen = 4*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tx_in_use_count", + .data = dma_tcp_state.tx_in_use_count, + .maxlen = (k_injecting_directions+1)*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +#if defined(TRACK_LIFETIME_IN_FIFO) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_lifetime_by_direction", + .data = max_lifetime_by_direction, + .maxlen = (k_injecting_directions)*sizeof(unsigned long long), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +#endif + CTL_PARAM("bluegene_tcp_is_built",bluegene_tcp_is_built) , + CTL_PARAM("count_no_skbuff",count_no_skbuff) , +#if defined(USE_SKB_TO_SKB) + CTL_PARAM("eager_limit",eager_limit) , +#endif +#if defined(CONFIG_BGP_STATISTICS) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "reception_fifo_histogram", + .data = reception_fifo_histogram, + .maxlen = 33*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "reception_fifo_histogram", + .data = reception_fifo_histogram, + .maxlen = 33*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "reception_hi_watermark", + .data = &reception_hi_watermark, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rtt_histogram", + .data = rtt_histogram, + .maxlen = 33*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "transit_histogram", + .data = transit_histogram, + .maxlen = 33*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "resequence_histogram", + .data = dma_tcp_state.resequence_histogram, + .maxlen = k_concurrent_receives*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "bytes_sent", + .data = &dma_tcp_state.bytes_sent, + .maxlen = sizeof(unsigned long long), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "bytes_received", + .data = &dma_tcp_state.bytes_received, + .maxlen = sizeof(unsigned long long), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +#endif + +#if defined(CONFIG_TCP_HIATUS_COUNTS) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tcp_hiatus_counts", + .data = tcp_hiatus_counts, + .maxlen = k_tcp_hiatus_reasons*sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tcp_force_nodelay", + .data = &sysctl_tcp_force_nodelay, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tcp_permit_cwnd", + .data = &sysctl_tcp_permit_cwnd, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tcp_max_cwnd", + .data = &sysctl_tcp_max_cwnd, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, +#endif + +#if defined(ENABLE_TUNING) + CTL_PARAM("tuning_num_packets",tuning_num_packets) , + CTL_PARAM("tuning_num_empty_passes",tuning_num_empty_passes) , + CTL_PARAM("tuning_non_empty_poll_delay",tuning_non_empty_poll_delay) , + CTL_PARAM("tuning_poll_after_enabling",tuning_poll_after_enabling) , + CTL_PARAM("tuning_run_handler_on_hwi",tuning_run_handler_on_hwi) , + CTL_PARAM("tuning_clearthresh_slih",tuning_clearthresh_slih) , + CTL_PARAM("tuning_clearthresh_flih",tuning_clearthresh_flih) , + CTL_PARAM("tuning_disable_in_dcr",tuning_disable_in_dcr) , + + CTL_PARAM("tuning_injection_hashmask",tuning_injection_hashmask) , + + CTL_PARAM_DCR("tuning_recfifo_threshold",tuning_recfifo_threshold) , + + CTL_PARAM("tuning_exploit_reversepropose",tuning_exploit_reversepropose) , + CTL_PARAM("tuning_counters_per_source",tuning_counters_per_source) , + CTL_PARAM("tuning_defer_skb_until_counter",tuning_defer_skb_until_counter) , + CTL_PARAM("tuning_deliver_eagerly",tuning_deliver_eagerly) , + CTL_PARAM("tuning_diagnose_rst",tuning_diagnose_rst) , + CTL_PARAM("tuning_select_fifo_algorithm",tuning_select_fifo_algorithm) , + CTL_PARAM("tuning_min_icsk_timeout",tuning_min_icsk_timeout) , + CTL_PARAM("tuning_virtual_channel",tuning_virtual_channel) , + + CTL_PARAM_DCR_C8B("tuning_dcr_c8b",tuning_dcr_c8b) , +#endif +#if defined(CONFIG_BGP_TORUS_DIAGNOSTICS) + { + .ctl_name = CTL_UNNUMBERED, + .procname = "tcp_scattergather_frag_limit", + .data = &tcp_scattergather_frag_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, +#endif + +#if defined(KEEP_TCP_FLAG_STATS) + CTL_PARAM("tcp_count_fin",tcp_received_flag_count[7]) , + CTL_PARAM("tcp_count_syn",tcp_received_flag_count[6]) , + CTL_PARAM("tcp_count_rst",tcp_received_flag_count[5]) , + CTL_PARAM("tcp_count_psh",tcp_received_flag_count[4]) , + CTL_PARAM("tcp_count_ack",tcp_received_flag_count[3]) , + CTL_PARAM("tcp_count_urg",tcp_received_flag_count[2]) , + CTL_PARAM("tcp_count_ece",tcp_received_flag_count[1]) , + CTL_PARAM("tcp_count_cwr",tcp_received_flag_count[0]) , +#endif + { 0 }, +}; + +static void __init +register_torus_sysctl(dma_tcp_t *dma_tcp) +{ + dma_tcp->sysctl_table_header=register_sysctl_paths(bgp_torus_ctl_path,bgp_dma_table) ; + TRACEN(k_t_init, "sysctl_table_header=%p",dma_tcp->sysctl_table_header) ; + +} + +/* feature for exploring all-to-all performance with a device in /dev */ +static int bgpdmatcp_add_device(int major, int minor, const char* name); +static int bgpdmatcp_device_open(struct inode *inode, struct file *filp); +static int bgpdmatcp_device_release(struct inode *inode, struct file * filp); +static long bgpdmatcp_device_ioctl( struct file * filp, + unsigned int cmd, unsigned long arg); +enum { + k_bgpdmatcp_major = 126 , + k_bgpdmatcp_minor_nums = 1 +} ; + +struct bgpdmatcp_dev +{ + int major,minor; /* device major, minor */ + struct task_struct* current; /* process holding device */ + int signum; /* signal to send holding process */ + wait_queue_head_t read_wq; + int read_complete; + struct semaphore sem; /* interruptible semaphore */ + struct cdev cdev; /* container device? */ +}; + + +static struct bgpdmatcp_dev bgpdmatcp_device; + + +static struct file_operations bgpdmatcp_device_fops = + { + .owner= THIS_MODULE, + .open= bgpdmatcp_device_open, + .read = NULL, + .write= NULL, + .poll= NULL, + .unlocked_ioctl= bgpdmatcp_device_ioctl, + .release= bgpdmatcp_device_release, + .mmap= NULL, + }; + + +static int bgpdmatcp_add_device(int major, + int minor, + const char* devname + ) +{ + int ret; + dev_t devno; + struct bgpdmatcp_dev* dev = &bgpdmatcp_device; + + /* initilize struct */ + init_MUTEX (&dev->sem); + dev->major = major; + dev->minor = minor; + init_waitqueue_head(&dev->read_wq); + dev->read_complete = 0; + devno=MKDEV(major,minor); + + /* register i.e., /proc/devices */ + ret=register_chrdev_region(devno,1,(char *)devname); + + if (ret) { + printk (KERN_WARNING "bgpdmatcp: couldn't register device (%d,%d) err=%d\n", + major,minor,ret); + return ret; + } + + /* add cdev */ + cdev_init(&dev->cdev,&bgpdmatcp_device_fops); + dev->cdev.owner=THIS_MODULE; + dev->cdev.ops=&bgpdmatcp_device_fops; + ret=cdev_add(&dev->cdev,devno,1); + if (ret) { + printk(KERN_WARNING "bgpdmatcp: couldn't register device (%d,%d), err=%d\n", + major,minor,ret); + return ret; + } + + /* signul to pass to owning process, should be altered using ioctl */ + dev->signum=-1; + + + return 0; +} + + +static int bgpdmatcp_device_open (struct inode *inode, struct file *filp) +{ + struct bgpdmatcp_dev *dev=container_of(inode->i_cdev,struct bgpdmatcp_dev,cdev); + + if(down_interruptible(&dev->sem)) return -ERESTARTSYS; + up(&dev->sem); + + dev->current=current; + filp->private_data = (void*) dev; + + return 0; +} + + + + + +static int bgpdmatcp_device_release (struct inode *inode, struct file * filp) +{ + struct bgpdmatcp_dev *dev=(struct bgpdmatcp_dev *)filp->private_data; + + /*Ensure exclusive access*/ + if (down_interruptible(&dev->sem)) return -ERESTARTSYS; + + dev->current = NULL; + up(&dev->sem); + + return 0; +} + +/* Report the counts of how often a TCP write has stalled, by stall reason */ +static void bgp_dma_diag_report_hiatus_counts(int __user * report) +{ + copy_to_user(report,tcp_hiatus_counts,k_tcp_hiatus_reasons*sizeof(int)) ; +} + +/* Report bytes read and bytes written over the torus */ +static void bgp_dma_diag_report_transfer_counts(int __user * report) +{ + copy_to_user(report,&dma_tcp_state.bytes_received,sizeof(unsigned long long)) ; + copy_to_user(report+sizeof(unsigned long long)/sizeof(int),&dma_tcp_state.bytes_sent,sizeof(unsigned long long)) ; +} + + +enum { + k_ioctl_activate = 0 , + k_ioctl_wait = 1 , + k_ioctl_clearcount = 2 , + k_ioctl_activate_minicube = 3 , + k_ioctl_wait_sync = 4 , + k_ioctl_activate_to_one = 5 , + k_ioctl_report_tx_queue = 6 , + k_ioctl_report_hiatus_counts = 7 , + k_ioctl_report_bytes_transferred = 8 +}; +static long bgpdmatcp_device_ioctl ( + struct file * filp, + unsigned int cmd, + unsigned long arg) +{ + TRACEN(k_t_detail, "cmd=%d arg=0x%08lx",cmd,arg) ; + + switch (cmd) { + case k_ioctl_activate : + { + int sendBytes ; + if( get_user(sendBytes,(int __user *)arg) ) + { + return -EFAULT ; + } + if( sendBytes <= k_injection_packet_size) + { + dma_tcp_transfer_activate_sync(sendBytes) ; + } +/* else */ +/* { */ +/* dma_tcp_transfer_activate(sendBytes) ; */ +/* } */ + } + break ; +/* #if 0 */ +/* case k_ioctl_wait : */ +/* { */ +/* int demandCount ; */ +/* int rc ; */ +/* if( get_user(demandCount,(int __user *)arg) ) */ +/* { */ +/* return -EFAULT ; */ +/* } */ +/* rc = dma_tcp_transfer_wait(demandCount) ; */ +/* return rc ? 0 : (-EAGAIN) ; */ +/* } */ +/* break ; */ +/* #endif */ + case k_ioctl_wait_sync : + { + int demandCount ; + int rc ; + if( get_user(demandCount,(int __user *)arg) ) + { + return -EFAULT ; + } + rc = dma_tcp_transfer_wait_sync(demandCount) ; + return rc ? 0 : (-EAGAIN) ; + } + break ; + case k_ioctl_clearcount : + dma_tcp_transfer_clearcount() ; + break ; +/* #if 0 */ +/* case k_ioctl_activate_minicube : */ +/* { */ +/* int sendBytes ; */ +/* if( get_user(sendBytes,(int __user *)arg) ) */ +/* { */ +/* return -EFAULT ; */ +/* } */ +/* dma_tcp_transfer_activate_minicube(sendBytes) ; */ +/* } */ +/* break ; */ +/* case k_ioctl_activate_to_one : */ +/* { */ +/* int sendBytes ; */ +/* unsigned int tg ; */ +/* if( get_user(sendBytes,(int __user *)arg) ) */ +/* { */ +/* return -EFAULT ; */ +/* } */ +/* if( get_user(tg,(int __user *)(arg+sizeof(int))) ) */ +/* { */ +/* return -EFAULT ; */ +/* } */ +/* dma_tcp_transfer_activate_to_one(sendBytes,tg) ; */ +/* } */ +/* break ; */ +/* #endif */ + case k_ioctl_report_tx_queue : + bgp_dma_diag_report_transmission_queue((int __user *)arg) ; + break ; + case k_ioctl_report_hiatus_counts : + bgp_dma_diag_report_hiatus_counts((int __user *)arg) ; + break ; + case k_ioctl_report_bytes_transferred : + bgp_dma_diag_report_transfer_counts((int __user *)arg) ; + break ; + } + return 0; +} + +void __init +dma_tcp_devfs_procfs_init(dma_tcp_t * dma_tcp) +{ + bgpdmatcp_add_device(k_bgpdmatcp_major,0,"bgpdmatcp") ; + register_torus_sysctl(dma_tcp) ; +} diff --git a/drivers/net/bgp_torus/bgp_dma_memcpy.c b/drivers/net/bgp_torus/bgp_dma_memcpy.c new file mode 100644 index 00000000000000..67d515b9057438 --- /dev/null +++ b/drivers/net/bgp_torus/bgp_dma_memcpy.c @@ -0,0 +1,1321 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Description: copy_tofrom_user using the BGP DMA hardware + * + * + * + ********************************************************************/ +#define REQUIRES_DUMPMEM + +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/highmem.h> +#include <linux/mman.h> +#include <linux/syscalls.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/pagemap.h> + + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/time.h> +#include <asm/bitops.h> +#include <asm/div64.h> +#include <linux/vmalloc.h> +#include <asm/atomic.h> + +#include <linux/dma-mapping.h> + +#include <net/inet_connection_sock.h> +#include <net/inet_sock.h> +#include <net/inet_hashtables.h> +#include <net/tcp.h> + + + +/* #include "bglink.h" */ +#include <spi/linux_kernel_spi.h> + +#include <asm/time.h> + +#include "bgp_dma_tcp.h" +#include "bgp_bic_diagnosis.h" +#include "../bgp_network/bgdiagnose.h" +#include "../bgp_network/450_tlb.h" +#include "bgp_memcpy.h" + +/* Machine memory geometry */ +enum { + k_l1_line_size = 32 , + k_page_shift = PAGE_SHIFT , + k_page_size = 1 << k_page_shift , + k_page_offset_mask = k_page_size-1 +}; +/* How we are going to use the hardware */ +enum { + k_counters_per_core = 1 , + k_spinlimit = 100000 , + k_requires_fp = 0 , + k_my_vc_for_adaptive = k_VC_anyway + /* k_my_vc_for_adaptive = k_VC_ordering */ +}; +/* What diagnostics/verification are we going to enable */ +enum { +/* k_diagnose = 0 , */ + k_diag_not_mapped = 1 , + k_fromcheck_pre = 0 , + k_fromcheck_post = 0, + k_tocheck_pre = 0, + k_tocheck_post = 0 , + k_check_with_crc = 1 , + k_flush_target_from_l1 = 0 , + k_verify_dma = 1, + k_fixup_faulty_memcpy=1, + k_map_write_check=0 , + k_disable_after_too_many_faults=1 +}; + +/* value to let the counter get to when it is idle --- we do not want '0' because that would mean an interrupt */ +enum { + k_counter_idle_value = 0x00000010 +}; + + +/* For putting an 'msync'in where we don't think we should need it, but helping initial diagnostics */ +static inline void maybe_msync(void) +{ + _bgp_msync() ; +} +/* data cache block flush, evict the given line from L1 if it is there */ +static inline void dcbf(unsigned int a0,unsigned int a1) +{ + asm volatile( "dcbf %[a0],%[a1]" + : + : [a0] "b" (a0), [a1] "b" (a1) + ) ; +} +static inline void dcbf0(unsigned int a) +{ + asm volatile( "dcbf 0,%[a]" + : + : [a] "b" (a) + ) ; +} +static void flush_l1(void * address, unsigned int length) +{ + unsigned int address_int=(unsigned int) address ; + unsigned int address_end_int=address_int+length-1 ; + unsigned int line_start=address_int & ~(k_l1_line_size-1) ; + unsigned int line_end=address_end_int & ~(k_l1_line_size-1) ; + unsigned int line_count=(line_end-line_start)/k_l1_line_size + 1 ; + unsigned int x ; + unsigned int flush_address=line_start; + for(x=0;x<line_count;x+=1) + { + dcbf0(flush_address) ; + flush_address += k_l1_line_size ; + } +} +typedef struct { + unsigned int count ; + atomic_t in_use[k_counters_per_core] ; + unsigned int pad_to_line_size[(k_l1_line_size-k_counters_per_core-1)/sizeof(unsigned int)] ; +} core_counter_allocation_t __attribute__((aligned(32))); + +static core_counter_allocation_t counter_allocation[k_injecting_cores] ; + +static void show_injection_fifo_state(dma_tcp_t * dma_tcp,unsigned int counter_index) ; +static int acquire_counter(void) +{ + unsigned int this_core=smp_processor_id(); + core_counter_allocation_t * cci = counter_allocation + this_core ; + unsigned int prev_count = cci->count++ ; + unsigned int counter_index = prev_count & (k_counters_per_core-1) ; + int in_use = atomic_inc_return(cci->in_use+counter_index) ; + int rc=(1 == in_use) ? (counter_index + this_core*k_counters_per_core) : -1 ; + dma_tcp_t * dma_tcp=&dma_tcp_state ; + TRACEN(k_t_dmacopy,"prev_count=0x%08x counter_index=%d in_use=%d rc=%d",prev_count,counter_index,in_use,rc) ; + if( 1 == in_use) + { + DMA_CounterSetValueBaseMaxHw(dma_tcp->memcpyRecCounterGroup.counter[rc].counter_hw_ptr,k_counter_idle_value,0,0x0fffffff) ; + show_injection_fifo_state(dma_tcp, rc) ; + } + return rc ; + +} +static void release_counter(unsigned int counter) +{ + unsigned int counter_index=counter % k_counters_per_core ; + unsigned int core_index=counter / k_counters_per_core ; + core_counter_allocation_t * cci = counter_allocation + core_index ; + TRACEN(k_t_dmacopy,"counter=%d core_index=%d counter_index=%d in_use=%d",counter,core_index,counter_index,atomic_read(cci->in_use+counter_index)) ; + atomic_set(cci->in_use+counter_index,0) ; +} + +static void cause_fallback(void) +{ + TRACEN(k_t_request,"Turning off DMA memcpy") ; + bgp_memcpy_control.use_dma = 0 ; + dma_memcpy_statistic(k_copy_cause_fallback) ; +} + +static unsigned int find_real_address(const void * virtual_address) +{ + struct page *realpage = NULL ; + int res ; + /* Try to fault in all of the necessary pages */ + down_read(¤t->mm->mmap_sem); + res = get_user_pages( + current, + current->mm, + (unsigned long) virtual_address, + 1, /* One page */ + 0, /* intent read */ + 0, /* don't force */ + &realpage, + NULL); + up_read(¤t->mm->mmap_sem); + + TRACEN(k_t_dmacopy,"find_real_address virtual_address=%p res=%d page=%p pfn=0x%08lx real_address=0x%016llx", + virtual_address,res,realpage,page_to_pfn(realpage),page_to_phys(realpage)) ; + + if( 1 == res) /* Number of pages mapped, should be 1 for this call */ + { + unsigned int rc = page_to_phys(realpage) ; + put_page(realpage) ; + return rc ; + } + return 0 ; + +} + +static unsigned int v_to_r_maybe_show(const void * vaddr) +{ + unsigned int vaddr_int=(unsigned int)vaddr ; + int tlbx=search_tlb_v(vaddr_int) ; + int pageid=get_tlb_pageid(tlbx) ; + int xlat=get_tlb_xlat(tlbx) ; + int attrib=get_tlb_attrib(tlbx) ; + int tlbx1=search_tlb_v((unsigned int)vaddr) ; + if( (tlbx == tlbx1) /* Translation didn't change under me due to e.g. interrupt */ + && ((pageid & TLB0_V) != 0) /* TLB is valid */ + && ((tlbx & 0x20000000) != 0) /* search_tlb_v sets this bit if it found a translation */ + ) + { + unsigned int epn = TLB0_EPN_1K(pageid) ; // virtual page for the TLB + unsigned int rpn = TLB1_RPN_1K(xlat) ; // real page for the TLB + unsigned int result = (vaddr_int-epn) + rpn ; + TRACEN(k_t_request,"vaddr=%p tlbx=0x%08x pageid=0x%08x xlat=0x%08x attrib=0x%08x epn=0x%08x rpn=0x%08x result=0x%08x", + vaddr,tlbx,pageid,xlat,attrib,epn,rpn,result) ; + return result ; + + } + else + { + TRACEN(k_t_request,"vaddr=%p tlbx=0x%08x pageid=0x%08x tlbx1=0x%08x unmapped", + vaddr,tlbx,pageid,tlbx1) ; + tlbx=search_tlb_v(vaddr_int) ; + pageid=get_tlb_pageid(tlbx) ; + xlat=get_tlb_xlat(tlbx) ; + attrib=get_tlb_attrib(tlbx) ; + tlbx1=search_tlb_v((unsigned int)vaddr) ; + { + unsigned int epn = TLB0_EPN_1K(pageid) ; // virtual page for the TLB + unsigned int rpn = TLB1_RPN_1K(xlat) ; // real page for the TLB + unsigned int result = (vaddr_int-epn) + rpn ; + TRACEN(k_t_request,"retry vaddr=%p tlbx=0x%08x pageid=0x%08x xlat=0x%08x attrib=0x%08x epn=0x%08x rpn=0x%08x result=0x%08x", + vaddr,tlbx,pageid,xlat,attrib,epn,rpn,result) ; + } + + return (unsigned int) -1 ; // Not mapped + } +} + +static unsigned int v_to_r(const void * vaddr, tlb_t *t) +{ + unsigned int rc=v_to_r_maybe(vaddr,t) ; + unsigned int rc2=v_to_r_maybe(vaddr,t) ; + if( rc != rc2) + { + dma_memcpy_statistic(k_copy_inconsistent_tlb_1_info) ; + rc=rc2 ; + rc2=v_to_r_maybe(vaddr,t) ; + } + if( rc != rc2) + { + + dma_memcpy_statistic(k_copy_inconsistent_tlb_1_rejects) ; + TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_1",vaddr,rc,rc2) ; + return 0xffffffff ; + } + if( 0xffffffff == rc) // Not mapped, touch the address and see what happens + { + unsigned int pageInt ; + int getrc = get_user(pageInt,(unsigned int __user *)vaddr ) ; + _bgp_msync() ; + if( getrc ) + { + TRACEN(k_t_general,"Unmapped : %p",vaddr) ; + rc =(unsigned int) -1 ; // Not mapped + } + else + { + rc=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely + rc2=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely + if( rc != rc2) + { + dma_memcpy_statistic(k_copy_inconsistent_tlb_2_info) ; + rc=rc2 ; + rc2=v_to_r_maybe(vaddr,t) ; + } + if( rc != rc2) + { + dma_memcpy_statistic(k_copy_inconsistent_tlb_2_rejects) ; + TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_2",vaddr,rc,rc2) ; + return 0xffffffff ; + } + dma_memcpy_statistic(k_copy_tlb_touches) ; + } + } + return rc ; +} +static unsigned int v_to_r_write(const void * vaddr, tlb_t *t) +{ + unsigned int rc=v_to_r_maybe(vaddr,t) ; + unsigned int rc2=v_to_r_maybe(vaddr,t) ; + if( rc != rc2) + { + dma_memcpy_statistic(k_copy_inconsistent_tlb_1_info) ; + rc=rc2 ; + rc2=v_to_r_maybe(vaddr,t) ; + } + if( rc != rc2) + { + + dma_memcpy_statistic(k_copy_inconsistent_tlb_1_rejects) ; + TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_1",vaddr,rc,rc2) ; + return 0xffffffff ; + } + if( 0xffffffff == rc) // Not mapped, touch the address and see what happens + { + unsigned int pageInt =0; + int putrc = get_user(pageInt,(unsigned int __user *)vaddr ) ; + _bgp_msync() ; + if( putrc ) + { + TRACEN(k_t_general,"Unmapped : %p",vaddr) ; + rc =(unsigned int) -1 ; // Not mapped + } + else + { + rc=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely + rc2=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely + if( rc != rc2) + { + dma_memcpy_statistic(k_copy_inconsistent_tlb_2_info) ; + rc=rc2 ; + rc2=v_to_r_maybe(vaddr,t) ; + } + if( rc != rc2) + { + dma_memcpy_statistic(k_copy_inconsistent_tlb_2_rejects) ; + TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_2",vaddr,rc,rc2) ; + return 0xffffffff ; + } + dma_memcpy_statistic(k_copy_tlb_touches) ; + } + } + return rc ; +} +static inline void create_dma_descriptor_memcpy(dma_tcp_t *dma_tcp, + int injection_counter, + int reception_counter, + dma_addr_t dataAddr, + int msglen, + unsigned int offset, + DMA_InjDescriptor_t *desc + ) +{ + int ret1 __attribute((unused)); + TRACEN(k_t_dmacopy , "(>) memcpying injection_counter=%d reception_counter=%d dataAddr=0x%08llx msglen=0x%08x offset=0x%08x desc=%p",injection_counter,reception_counter,dataAddr,msglen,offset,desc); + if( 0 == msglen) + { + TRACEN(k_t_error , "(E) zero length memcpying injection_counter=%d reception_counter=%d dataAddr=0x%08llx msglen=0x%08x offset=0x%08x desc=%p",injection_counter,reception_counter,dataAddr,msglen,offset,desc); + } + ret1 = DMA_LocalDirectPutDescriptor( desc, + k_InjectionCounterGroup, /* inj cntr group id */ + injection_counter, /* inj counter id */ + dataAddr, /* send offset */ + k_ReceptionCounterGroupMemcpy, /* rec ctr grp */ + reception_counter, + offset, /* reception offset */ + msglen /* message length */ + ); + + TRACEN(k_t_dmacopy , "(<) ret1=%d",ret1); + +} + +static void diagnose_injection_fifo(DMA_InjFifo_t *f_ptr) +{ + int free_space_0 = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo, + 0, /* Use shadow head */ + 0);/* use shadow tail */ + int free_space_1 = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo, + 1, /* Use hardware head */ + 0);/* use shadow tail */ + TRACEN(k_t_request,"free_space_0=0x%08x free_space_1=0x%08x",free_space_0,free_space_1) ; +} + +static void diagnose_injection_fifo_by_id( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id + ) +{ + diagnose_injection_fifo(&fg_ptr->fifos[fifo_id]) ; +} + +static inline int inject_dma_descriptor_memcpy(dma_tcp_t *dma_tcp, + unsigned int desired_fifo, + DMA_InjDescriptor_t *desc) + { + int ret __attribute__((unused)); + TRACEN(k_t_dmacopy , "(>) injecting desired_fifo=%d desc=%p",desired_fifo,desc); + maybe_msync() ; + ret = DMA_InjFifoInjectDescriptorById( &dma_tcp->memcpyInjFifoGroupFrames, + dma_tcp->memcpyInjFifoFramesIds[desired_fifo], + desc ); + maybe_msync() ; + if(ret != 1 ) + { + TRACEN(k_t_error,"(!!!) ret=%d",ret) ; + diagnose_injection_fifo_by_id( + &dma_tcp->memcpyInjFifoGroupFrames, + dma_tcp->memcpyInjFifoFramesIds[desired_fifo] + ) ; + + } + + TRACEN(k_t_general , "(<) ret=%d",ret); + return 1 ; + + } +static void show_injection_fifo_state(dma_tcp_t * dma_tcp,unsigned int counter_index) ; +static int instrument_copy_user_address_within_page(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,const void * partner_vaddr,copy_op_t *c) ; + +typedef struct { + void * address ; + const void * partner_address ; + unsigned int size ; +} memcpy_control; + +static unsigned int dma_copy_partial(dma_tcp_t * dma_tcp,unsigned int counter_index, memcpy_control * mc,copy_op_t *c) +{ + void * address = mc->address ; + const void * partner_address = mc->partner_address ; + unsigned int size = mc->size ; + unsigned int address_int = (unsigned int) address ; + unsigned int partner_address_int = (unsigned int ) partner_address ; + + unsigned int address_offset=address_int & k_page_offset_mask ; + unsigned int partner_address_offset=partner_address_int & k_page_offset_mask ; + unsigned int lim_address=min(size,k_page_size-address_offset) ; + unsigned int lim_partner_address=min(size,k_page_size-partner_address_offset) ; + unsigned int lim_size=min(lim_address,lim_partner_address) ; + if( k_diagnose) c->frag_index += 1; + + TRACEN(k_t_dmacopy,"address=%p partner_address=%p size=0x%08x lim_size=0x%05x", + address,partner_address,size,lim_size) ; + + mc->address = address+lim_size ; + mc->partner_address = partner_address+lim_size ; + mc->size = size-lim_size ; + + return instrument_copy_user_address_within_page(dma_tcp,counter_index,address,lim_size,partner_address,c) ; +} + +/* return 0 iff the range described fits within one page */ +static int crosses_page_boundary(const void * address, unsigned int size) +{ + unsigned int a=(unsigned int) address ; + unsigned int ae = a+size-1 ; + return (ae >> k_page_shift ) - (a >> k_page_shift) ; +} +static unsigned int dma_copy_full_singlepage(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address,const void * partner_address,unsigned int size,copy_op_t *c) +{ + unsigned int rc ; + TRACEN(k_t_dmacopy,"(>) address=%p partner_address=%p size=0x%08x", + address,partner_address,size) ; + rc=instrument_copy_user_address_within_page(dma_tcp,counter_index,address,size,partner_address,c) ; + TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ; + return rc ; +} +static unsigned int dma_copy_full(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address,const void * partner_address,unsigned int size,copy_op_t *c) +{ + unsigned int rc=0 ; + memcpy_control mc ; + TRACEN(k_t_dmacopy,"(>) address=%p partner_address=%p size=0x%08x", + address,partner_address,size) ; + mc.address=address ; + mc.partner_address=partner_address ; + mc.size=size ; + while(mc.size != 0 && rc == 0) + { + rc |= dma_copy_partial(dma_tcp,counter_index,&mc,c) ; + } + TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ; + return rc ; +} +static unsigned int dma_copy_within_page(dma_tcp_t * dma_tcp,unsigned int counter_index, unsigned int real_address, unsigned int partner_real_address, unsigned int size,copy_op_t *c) +{ + unsigned int full_frame_count=size / k_torus_link_payload_size ; + unsigned int full_frame_size = full_frame_count * k_torus_link_payload_size ; + unsigned int trailing_frame_size = size - full_frame_size ; + unsigned int rc=0 ; + + DMA_InjDescriptor_t desc ; + TRACEN(k_t_dmacopy,"(>) counter_index=%d real_address=0x%08x partner_real_address=0x%08x size=0x%05x full_frame_count=%d full_frame_size=0x%08x trailing_frame_size=0x%08x", + counter_index,real_address,partner_real_address,size,full_frame_count,full_frame_size,trailing_frame_size) ; + if( k_requires_fp) + { + enable_kernel_fp() ; + } + if( full_frame_size > 0 ) + { + create_dma_descriptor_memcpy(dma_tcp,0,counter_index,partner_real_address,full_frame_size,real_address,&desc) ; + inject_dma_descriptor_memcpy(dma_tcp,counter_index,&desc) ; + rc = 1 ; + } + if( trailing_frame_size > 0 ) + { + show_injection_fifo_state(dma_tcp,counter_index) ; + create_dma_descriptor_memcpy(dma_tcp,0,counter_index,partner_real_address+full_frame_size,trailing_frame_size,real_address + full_frame_size,&desc) ; + inject_dma_descriptor_memcpy(dma_tcp,counter_index,&desc) ; + rc+=1 ; + } + return rc ; +} + +static void spin_idle(unsigned int idlecount) +{ + unsigned int x ; + for(x=0;x<idlecount;x+=1) + { + asm volatile("nop;"); + } +} + +/* Engage in least-squares regression to estimate data rates */ +dma_statistic_t bgp_dma_rate ; +static void rate_observe(dma_statistic_t * st,int x,int y) +{ + int s1 = st->s1 + 1; + int sx = st->sx + x; + long long int sxx = st->sxx + x*x ; + int sy = st->sy + y ; + long long int sxy = st->sxy + x*y ; + + + st->s1 = s1 ; + st->sx = sx ; + st->sxx = sxx ; + st->sy = sy ; + st->sxy = sxy ; + + if( ((s1 >> 1) & 0xff ) <= bgp_memcpy_control.rate_observe_report_count ) /* Sample a few */ + { + long long det=s1*sxx-((long long)sx)*sx ; + long long m0 = s1*sxy - ((long long)sx)*sy ; + long long m1 = sxx*sy -sx*sxy ; + unsigned long long q0 = m0 ; + unsigned long long q1 = m1 ; + unsigned int uidet = det ; + if( uidet != 0) + { + do_div(q0,uidet) ; + do_div(q1,uidet) ; + } + else + { + q0 = 0 ; + q1 = 0 ; + } + + TRACEN(k_t_request,"x=%d y=%d s1=%d sx=%d sxx=%lld sy=%d sxy=%lld det=%lld m0=%lld m1=%lld q0=%lld q1=%lld", + x,y,s1,sx,sxx,sy,sxy,det,m0,m1,q0,q1) ; + } + +} +static int await_copy_completion(dma_tcp_t * dma_tcp,unsigned int counter_index, unsigned int size ) +{ + int rc=0 ; + unsigned int fifo_current_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; + unsigned int fifo_initial_head = fifo_current_head ; + unsigned int fifo_tail = + (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; + unsigned int spincount = 0 ; + unsigned int initial_rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ; + unsigned int idlecount=bgp_memcpy_control.cycles_per_packet*size/256 ; + TRACEN(k_t_dmacopy,"(>) counter_index=%d size=0x%08x fifo_current_head=0x%08x fifo_tail=0x%08x initial_rec_counter_val=%d idlecount=%d", + counter_index,size,fifo_current_head,fifo_tail,initial_rec_counter_val,idlecount) ; + show_injection_fifo_state(dma_tcp,counter_index) ; + spin_idle(idlecount) ; + maybe_msync() ; + { + int rec_counter_after_idle=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ; + int rec_counter_val = rec_counter_after_idle ; + if( rec_counter_after_idle > 0) + { + rate_observe(&bgp_dma_rate, 0,0) ; + rate_observe(&bgp_dma_rate, idlecount,initial_rec_counter_val-rec_counter_after_idle) ; + } +/* while(fifo_current_head != fifo_tail && spincount < k_spinlimit ) */ +/* { */ +/* fifo_current_head = */ +/* (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; */ +/* // fifo_current_tail = */ +/* // (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; */ +/* spincount += 1 ; */ +/* } */ + while( rec_counter_val > k_counter_idle_value && spincount < k_spinlimit ) + { + maybe_msync() ; + rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ; + spincount += 1 ; + } + maybe_msync() ; + DMA_CounterSetDisableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ; + fifo_current_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; + { +/* unsigned int rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ; */ + dma_memcpy_statistic((0==spincount) ? k_copy_await_idle_zero : ((1==spincount) ? k_copy_await_idle_high : k_copy_await_idle_low)) ; + TRACEN(k_t_dmacopy, + "size=0x%08x fifo_initial_head=0x%08x fifo_current_head=0x%08x fifo_tail=0x%08x initial_rec=%d after_idle=%d rec=%d spincount=%d idlecount=%d", + size,fifo_initial_head,fifo_current_head,fifo_tail,initial_rec_counter_val,rec_counter_after_idle,rec_counter_val,spincount,idlecount) ; + if( fifo_current_head != fifo_tail || rec_counter_val != k_counter_idle_value) + { + rc=1 ; + TRACEN(k_t_error,"(E) fifo_current_head=0x%08x fifo_tail=0x%08x spincount=%d rec_counter_val=%d", + fifo_current_head,fifo_tail,spincount,rec_counter_val) ; + } + TRACEN(k_t_dmacopy,"(<) rc=%d fifo_current_head=0x%08x fifo_tail=0x%08x spincount=%d rec_counter_val=%d",rc,fifo_current_head,fifo_tail,spincount,rec_counter_val) ; + } + } + return rc ; +} + +static void show_injection_fifo_state(dma_tcp_t * dma_tcp,unsigned int counter_index) +{ + unsigned int fifo_current_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; + unsigned int fifo_current_tail = + (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; + unsigned int rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ; + unsigned int rec_counter_base=DMA_CounterGetBaseHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr) ; + unsigned int rec_counter_max=DMA_CounterGetMaxHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr) ; + unsigned int enabled=DMA_CounterGetEnabled(&dma_tcp->memcpyRecCounterGroup,0) ; + TRACEN(k_t_dmacopy,"counter_index=%d fifo_current_head=0x%08x fifo_current_tail=0x%08x rec_counter_val=0x%08x base=0x%08x max=0x%08x enabled=0x%08x", + counter_index,fifo_current_head,fifo_current_tail,rec_counter_val,rec_counter_base,rec_counter_max,enabled) ; + +} + +static inline int next_prbs(int seed) +{ + int ncmask = seed >> 31 ; /* 0x00000000 or 0xffffffff */ + return (seed << 1) ^ (0x04C11DB7 & ncmask) ; /* CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */ +} + +static inline unsigned int rc_revise(unsigned int X0, unsigned int X1) +{ + if(k_check_with_crc) + { + return next_prbs(X0) ^ X1 ; + } + else + { + return X0+X1 ; + } + +} +static unsigned int region_check_int(const unsigned int * ai, unsigned int intcount) +{ + unsigned int x ; + unsigned int rc=0 ; + for(x=0;x<intcount;x+=1) + { + rc=rc_revise(rc,*(ai++)) ; + } + return rc ; + +} +static unsigned int region_check(const void * addr, unsigned int size) +{ + const unsigned int * ai = (const unsigned int *) addr ; + unsigned int intcount = size/sizeof(int) ; + unsigned int tailcount = size % sizeof(int) ; + unsigned int rc = region_check_int(ai,intcount) ; + if(tailcount ) + { + const unsigned char * ac = (const unsigned char *) addr ; + unsigned int tail = (ac[size-3] << 16) | (ac[size-3] << 8) | ac[size-1] ; + rc=rc_revise(rc,tail) ; + } + return rc ; + +} +static void report_faulty_memcpy(void * dest, const void * src, unsigned long size,copy_op_t *c) +{ + unsigned int * di = (unsigned int *) dest ; + const unsigned int * si = (const unsigned int *) src ; + unsigned char * dc = (unsigned char *) (dest) ; + const unsigned char * sc = (const unsigned char *) (src) ; + unsigned int x ; + unsigned int faultwordcount = 0 ; + unsigned int zsourcecount = 0 ; + v_to_r_maybe_show(dest) ; + v_to_r_maybe_show(src) ; + c->to_check_post=region_check(dest,size) ; + if( k_disable_after_too_many_faults) + { + int faults_to_go=bgp_memcpy_control.faults_until_disable-1 ; + if( faults_to_go <= 0 ) + { + cause_fallback() ; + } + else + { + bgp_memcpy_control.faults_until_disable=faults_to_go ; + } + } + dma_memcpy_statistic(k_copy_verify_miscompares) ; + TRACEN(k_t_error,"dest=%p src=%p size=0x%08lx",dest,src,size) ; + for(x=0;x<size/sizeof(unsigned int);x+=1) + { + unsigned int sx = si[x] ; + unsigned int dx = di[x] ; + zsourcecount += (0 == sx) ; + if( dx != sx ) + { + if( faultwordcount < 10 ) + { + TRACEN(k_t_error,"(E) x=0x%08x di+x=%p si+x=%p di[x]=0x%08x si[x]=0x%08x", + x,di+x,si+x,dx,sx) ; + } + if( k_fixup_faulty_memcpy) di[x]=sx ; + faultwordcount += 1 ; + } + } + if( dc[size-3] != sc[size-3]) + { + TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x", + size-3,dc+size-3,sc+size-3,dc[size-3],sc[size-3]) ; + if( k_fixup_faulty_memcpy) dc[size-3]=sc[size-3] ; + } + if( dc[size-2] != sc[size-2]) + { + TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x", + size-2,dc+size-2,sc+size-2,dc[size-2],sc[size-2]) ; + if( k_fixup_faulty_memcpy) dc[size-2]=sc[size-2] ; + } + if( dc[size-1] != sc[size-1]) + { + TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x", + size-1,dc+size-1,sc+size-1,dc[size-1],sc[size-1]) ; + if( k_fixup_faulty_memcpy) dc[size-1]=sc[size-1] ; + } + TRACEN(k_t_error,"%d/%ld words incorrectly copied, %d sourcewords were zero",faultwordcount,size/sizeof(unsigned int),zsourcecount) ; + v_to_r_maybe_show(dest) ; + v_to_r_maybe_show(src) ; + show_stack(NULL,0) ; + c->from_check_post=region_check(src,size) ; + diagnose_faulty_copy(c) ; +} +/* Check that a 'memcpy' was accurately done ... */ +static int verify_memcpy(void * dest, const void * src, unsigned long size,copy_op_t *c) +{ + unsigned int * di = (unsigned int *) dest ; + const unsigned int * si = (const unsigned int *) src ; + unsigned char * dc = (unsigned char *) (dest) ; + const unsigned char * sc = (const unsigned char *) (src) ; + unsigned int q = di[0] ^ si[0] ; + unsigned int x ; + dma_memcpy_statistic(k_copy_verify_attempts) ; + TRACEN(k_t_dmacopy,"dest=%p src=%p size=0x%08lx di[0]=0x%08x si[0]=0x%08x",dest,src,size,di[0],si[0]) ; + for(x=1;x<size/sizeof(unsigned int);x+=1) + { + q |= *(++di) ^ *(++si) ; + } + q |= (dc[size-3] ^ sc[size-3]) |(dc[size-2] ^ sc[size-2]) |(dc[size-1] ^ sc[size-1]) ; + if(q) report_faulty_memcpy(dest,src,size,c) ; + return q ; +} + +static int instrument_copy_user_address_within_page(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,const void * partner_vaddr,copy_op_t *c) +{ + unsigned int addr_int =(unsigned int) address ; + unsigned int start_page=(addr_int >> k_page_shift) ; + unsigned int end_page=((addr_int+size-1) >> k_page_shift) ; + unsigned int partner_address=(unsigned int) partner_vaddr ; + unsigned int partner_start_page=(partner_address >> k_page_shift) ; + unsigned int partner_end_page=((partner_address+size-1) >> k_page_shift) ; + TRACEN(k_t_dmacopy,"counter_index=%d address=%p size=0x%08lx partner_vaddr=%p start_page=0x%08x end_page=0x%08x partner_start_page=0x%08x partner_end_page=0x%08x", + counter_index,address,size,partner_vaddr,start_page,end_page,partner_start_page,partner_end_page) ; + maybe_msync() ; + if( end_page == start_page && partner_end_page == partner_start_page) + { + unsigned int real_address=v_to_r( address,&c->a_tlb) ; + unsigned int real_address_tablewalk=find_real_address(address) ; + unsigned int partner_real_address=v_to_r_write(partner_vaddr,&c->b_tlb) ; + unsigned int partner_real_address_tablewalk=find_real_address(partner_vaddr) ; + TRACEN(k_t_dmacopy,"address=%p real_address=0x%08x r_a_tablewalk=0x%08x partner_vaddr=%p partner_real_address=0x%08x p_r_a_tablewalk=0x%08x",address,real_address,real_address_tablewalk,partner_vaddr,partner_real_address,partner_real_address_tablewalk) ; + if( k_diagnose) + { + c->a_raddress=real_address ; + c->b_raddress=partner_real_address ; + } + if( 0xffffffff != real_address && 0xffffffff != partner_real_address) + { + unsigned int injection_count ; + TRACEN(k_t_dmacopy,"address=%p real_address=0x%08x r_a_tablewalk=0x%08x partner_vaddr=%p partner_real_address=0x%08x p_r_a_tablewalk=0x%08x",address,real_address,real_address_tablewalk,partner_vaddr,partner_real_address,partner_real_address_tablewalk) ; + if( k_flush_target_from_l1) + { + flush_l1(address,size) ; + } + injection_count=dma_copy_within_page(dma_tcp,counter_index,real_address,partner_real_address,size,c) ; + return 0 ; + + } + if( 0xffffffff == real_address ) dma_memcpy_statistic(k_copy_source_tlb_rejects) ; + if( 0xffffffff == partner_real_address ) dma_memcpy_statistic(k_copy_target_tlb_rejects) ; + return 1 ; + } + dma_memcpy_statistic(k_copy_spanpage_rejects) ; + return 1 ; // At least one of the addresses wasn't mapped, or things spanned a page boundary + +} + +static int instrument_copy_user_address(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,dma_addr_t partner_addr, const void * partner_vaddr,copy_op_t *c) +{ + int rc ; + { + rc= dma_copy_full(dma_tcp,counter_index,address, partner_vaddr,size,c) ; + if( 0 == rc) + { + rc = await_copy_completion(dma_tcp,counter_index,size) ; + } + } + if( 0 == rc && k_verify_dma && bgp_memcpy_control.verify_dma) + { + { + rc = verify_memcpy(address, partner_vaddr, size,c) ; + if(rc) + { + TRACEN(k_t_error,"trapped") ; + } + } + } + return rc ; + +} +static int instrument_copy_user_address_singlepage(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,dma_addr_t partner_addr, const void * partner_vaddr,copy_op_t *c) +{ + int rc ; + { + rc= dma_copy_full_singlepage(dma_tcp,counter_index, address, partner_vaddr,size,c) ; + if( 0 == rc) + { + rc = await_copy_completion(dma_tcp,counter_index,size) ; + } + } + if( 0 == rc && k_verify_dma && bgp_memcpy_control.verify_dma) + { + { + rc = verify_memcpy(address, partner_vaddr, size,c) ; + if(rc) + { + TRACEN(k_t_error,"trapped") ; + } + } + } + return rc ; + +} +static int instrument_copy_user(void * to, const void * from, unsigned long size,unsigned int counter_index,copy_op_t *c) +{ + dma_tcp_t * dma_tcp=&dma_tcp_state ; + dma_addr_t fromAddr = dma_map_single(NULL, (void *)from, size, DMA_TO_DEVICE); + int rc ; + TRACEN(k_t_dmacopy,"(>)") ; + maybe_msync() ; + DMA_CounterSetValueHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr,size+k_counter_idle_value) ; + show_injection_fifo_state(dma_tcp, counter_index) ; + DMA_CounterSetEnableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ; + show_injection_fifo_state(dma_tcp, counter_index) ; + maybe_msync() ; + DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, dma_tcp->injCounterId, 0xffffffff ); + _bgp_msync() ; + rc= instrument_copy_user_address(dma_tcp,counter_index,to,size,fromAddr,(void *)from,c) ; + TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ; + return rc ; +} +static int instrument_copy_user_singlepage(void * to, const void * from, unsigned long size,unsigned int counter_index,copy_op_t *c) +{ + dma_tcp_t * dma_tcp=&dma_tcp_state ; + dma_addr_t fromAddr = dma_map_single(NULL, (void *)from, size, DMA_TO_DEVICE); + int rc ; + TRACEN(k_t_dmacopy,"(>)") ; + maybe_msync() ; + show_injection_fifo_state(dma_tcp, counter_index) ; + DMA_CounterSetValueHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr,size+k_counter_idle_value) ; + show_injection_fifo_state(dma_tcp, counter_index) ; + DMA_CounterSetEnableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ; + show_injection_fifo_state(dma_tcp, counter_index) ; + maybe_msync() ; + DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, dma_tcp->injCounterId, 0xffffffff ); + _bgp_msync() ; + rc= instrument_copy_user_address_singlepage(dma_tcp,counter_index,to,size,fromAddr,from,c) ; + TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ; + return rc ; +} +static int instrument_copy_tofrom_user(void * to, const void * from, unsigned long size,copy_op_t *c) +{ + int rc=1 ; + int counter_index=acquire_counter() ; + TRACEN(k_t_dmacopy,"(>) to=%p from=%p size=0x%08lx counter_index=%d",to,from,size,counter_index) ; + if( counter_index >= 0) + { + rc= instrument_copy_user(to,from,size,counter_index,c) ; + release_counter(counter_index) ; + } + else + { + dma_memcpy_statistic(k_copy_no_counter_rejects) ; + } + TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ; + return rc ; +} + +static int instrument_copy_tofrom_user_singlepage(void *to, const void * from, unsigned long size,copy_op_t *c) +{ + int rc=1 ; + int counter_index=acquire_counter() ; + TRACEN(k_t_dmacopy,"(>) to=%p from=%p size=0x%08lx counter_index=%d",to,from,size,counter_index) ; + if( counter_index >= 0) + { + rc= instrument_copy_user_singlepage(to,from,size,counter_index,c) ; + release_counter(counter_index) ; + } + else + { + dma_memcpy_statistic(k_copy_no_counter_rejects) ; + } + TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ; + return rc ; +} + +static int all_pages_mapped_read(unsigned long address, unsigned long size) +{ + unsigned int start_page=(address >> k_page_shift) ; + unsigned int end_page=((address+size) >> k_page_shift) ; + unsigned int page_count = end_page-start_page+1 ; + unsigned int x ; + if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK + /* Defend against the possibility that the user application has posted an unmapped address */ + for(x=0;x<page_count;x+=1) + { + int pageInt ; + int __user * pageIntP = (int __user *) ((start_page+x) << k_page_shift) ; + if( get_user(pageInt,pageIntP) ) + { + TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ; + if( k_diag_not_mapped) + { + tlb_t t ; + unsigned int r=v_to_r_maybe((void *)address, &t) ; + TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ; + TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ; + diagnose_tlb(&t) ; + } + + return 1; + } + + } + return 0 ; +} + +static int all_pages_mapped_write(unsigned long address, unsigned long size) +{ + unsigned int start_page=(address >> k_page_shift) ; + unsigned int end_page=((address+size) >> k_page_shift) ; + unsigned int page_count = end_page-start_page+1 ; + unsigned int x ; +/* int pageInt ; */ + char __user * pageCharP = (char __user *) address ; + if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK + if(put_user(0,pageCharP)) + { + TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ; + if( k_diag_not_mapped) + { + tlb_t t ; + unsigned int r=v_to_r_maybe((void *)address, &t) ; + TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ; + TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ; + diagnose_tlb(&t) ; + } + + return 1; + } + /* Defend against the possibility that the user application has posted an unmapped address */ + for(x=1;x<page_count;x+=1) + { +/* int pageInt ; */ + char __user * pageCharP = (char __user *) ((start_page+x) << k_page_shift) ; +/* TODO: Fix this up against the possibility of 0..2 bytes at the start of the last page */ + if( put_user(0,pageCharP) ) + { + TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ; + if( k_diag_not_mapped) + { + tlb_t t ; + unsigned int r=v_to_r_maybe((void *)address, &t) ; + TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ; + TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ; + diagnose_tlb(&t) ; + } + + return 1; + } + + } + return 0 ; +} + +/* Returns 1 if we could DMA-copy things, 0 if we couldn't */ +extern unsigned long bgp_dma_instrument_copy_tofrom_user(void *to, + const void *from, unsigned long size) +{ + TRACEN(k_t_general,"to=%p from=%p size=0x%08lx",to,from,size) ; + dma_memcpy_statistic(k_copy_tofrom_user_calls) ; + if( size > 0 && size >= bgp_memcpy_control.dma_threshold ) + { + copy_op_t c ; + TRACEN(k_t_dmacopy,"to=%p from=%p size=0x%08lx",to,from,size) ; + if( all_pages_mapped_read((unsigned long) from,size)) + { + dma_memcpy_statistic(k_copy_source_rejects) ; + return 1 ; + } + if( k_map_write_check && all_pages_mapped_write((unsigned long) to,size)) + { + dma_memcpy_statistic(k_copy_target_rejects) ; + return 1 ; + } + if( k_diagnose) + { + c.to_vaddr=to ; + c.from_vaddr=(void *)from ; + c.size=size ; + c.frag_index=0 ; + c.from_check_post = 0xffffffff ; + c.to_check_pre = 0xffffffff ; + c.to_check_post = 0xffffffff ; + if(k_fromcheck_pre) + { + c.from_check_pre=region_check((void *)from,size) ; + } + else + { + c.from_check_pre = 0xffffffff ; + } + if(k_tocheck_pre) + { + c.to_check_pre=region_check(to,size) ; + } + else + { + c.to_check_pre = 0xffffffff ; + } + } + + + if( crosses_page_boundary(from,size) || crosses_page_boundary(to,size)) + { + if( bgp_memcpy_control.handle_pagecrossing) + { + + unsigned long rc= instrument_copy_tofrom_user(to,from,size,&c) ; + dma_memcpy_statistic((0==rc) ? k_copy_accelerate_successes : k_copy_accelerate_rejects) ; + TRACEN(k_t_dmacopy,"rc=%ld",rc) ; + if(k_diagnose && 0 == rc ) + { + if(k_fromcheck_post) + { + c.from_check_post=region_check(from,size) ; + } + if(k_tocheck_post) + { + c.to_check_post=region_check(to,size) ; + } + if( (k_fromcheck_pre && k_fromcheck_post && c.from_check_post != c.from_check_pre) + || + (k_fromcheck_pre && k_tocheck_post && c.from_check_pre != c.to_check_post) + || + (k_fromcheck_post && k_tocheck_post && c.from_check_post != c.to_check_post) + ) + { + diagnose_faulty_copy(&c) ; + return 1 ; + } + } + return rc ; + } + else + { + dma_memcpy_statistic(k_copy_crosspage_limitation_rejects) ; + return 1 ; + } + } + else + { + { + unsigned long rc= instrument_copy_tofrom_user_singlepage(to,from,size,&c) ; + dma_memcpy_statistic((0==rc) ? k_copy_accelerate_successes : k_copy_accelerate_rejects) ; + TRACEN(k_t_dmacopy,"rc=%ld",rc) ; + if(k_diagnose && 0 == rc ) + { + if(k_fromcheck_post) + { + c.from_check_post=region_check(from,size) ; + } + if(k_tocheck_post) + { + c.to_check_post=region_check(to,size) ; + } + if( (k_fromcheck_pre && k_fromcheck_post && c.from_check_post != c.from_check_pre) + || + (k_fromcheck_pre && k_tocheck_post && c.from_check_pre != c.to_check_post) + || + (k_fromcheck_post && k_tocheck_post && c.from_check_post != c.to_check_post) + ) + { + diagnose_faulty_copy(&c) ; + return 1 ; + } + } + + return rc ; + } + + } + } + dma_memcpy_statistic(k_copy_size_rejects) ; + return 1 ; // Not copied, size under threshold + +} + +static struct ctl_table dma_memcpy_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "counter_allocation_0", + .data = counter_allocation+0, + .maxlen = sizeof(core_counter_allocation_t), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "counter_allocation_1", + .data = counter_allocation+1, + .maxlen = sizeof(core_counter_allocation_t), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "counter_allocation_2", + .data = counter_allocation+2, + .maxlen = sizeof(core_counter_allocation_t), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "counter_allocation_3", + .data = counter_allocation+3, + .maxlen = sizeof(core_counter_allocation_t), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { 0 }, +} ; + +static struct ctl_path dma_memcpy_ctl_path[] = { + { .procname = "bgp", .ctl_name = 0, }, + { .procname = "dmacopy", .ctl_name = 0, }, + { }, +}; + +static void __init +bgp_dma_memcpy_init_counter_allocation(void) +{ + unsigned int core_index ; + register_sysctl_paths(dma_memcpy_ctl_path,dma_memcpy_table) ; + for(core_index=0;core_index<k_injecting_cores;core_index+=1) + { + core_counter_allocation_t * cci = counter_allocation + core_index ; + unsigned int counter_index ; + cci->count = 0; + for(counter_index=0;counter_index<k_counters_per_core;counter_index+=1) + { + atomic_set(cci->in_use+counter_index,0) ; + } + + } + TRACEN(k_t_init,"counter_allocation initialised") ; +} + +/* This gets driven in the FLIH when a DMA interrupt occurs */ +static void dummyCounterZeroHandler(u32 arg1, u32 arg2, u32 arg3, u32 arg4) +{ + TRACEN(k_t_error,"(>) Unexpected interrupt" ); + TRACEN(k_t_error,"(<)" ); +} + +/* 'copyin/out' via the BGP DMA is believed functional, but seems not useful since copying via the parallel FP regs */ +/* seems to run faster, even in cases where that wipes out the L1 cache. Code is left here in case someone wants to */ +/* try improving it, and to indicate which sections of the BGP DMA unit (injection fifo and reception counters) are needed */ +/* to make it work. */ +void __init +bgp_dma_memcpyInit(dma_tcp_t * dma_tcp) +{ + bgp_dma_memcpy_init_counter_allocation() ; + { + int counter_index ; + for( counter_index=0; counter_index< k_injecting_cores; counter_index += 1 ) + { + dma_tcp->memcpyInjFifoFramesPri[ counter_index ] = 0 ; + dma_tcp->memcpyInjFifoFramesLoc[ counter_index ] = 1 ; + dma_tcp->memcpyInjFifoFramesIds[ counter_index ] = counter_index ; + dma_tcp->memcpyInjFifoFramesMap[ counter_index ] = 0; /* 'memcpy' injector not connected to torus */ + } + } + { + int ret = DMA_InjFifoGroupAllocate( k_InjectionFifoGroupMemcpy, + k_injecting_cores, /* num inj fifos */ + dma_tcp->memcpyInjFifoFramesIds, + dma_tcp->memcpyInjFifoFramesPri, + dma_tcp->memcpyInjFifoFramesLoc, + dma_tcp->memcpyInjFifoFramesMap, + NULL, + NULL, + NULL, + NULL, + NULL, + & dma_tcp->memcpyInjFifoGroupFrames ); + + TRACEN(k_t_init,"(=)DMA_InjFifoGroupAllocate rc=%d", ret ); + + if( 0 == ret) + { + int counter_index ; + for( counter_index=0; counter_index< k_injecting_cores; counter_index += 1 ) + { + TRACEN(k_t_init,"fg_ptr=%p fifo_id=%d va_start=%p va_head=%p va_end=%p", + &dma_tcp->memcpyInjFifoGroupFrames, + dma_tcp->memcpyInjFifoFramesIds[counter_index], + dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo, + dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo, + dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo+1 + ) ; + { + int ret = DMA_InjFifoInitById( &dma_tcp->memcpyInjFifoGroupFrames, + dma_tcp->memcpyInjFifoFramesIds[counter_index], + dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo, + dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo, /* head */ + dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo+1 /* end */ + ); + + dma_tcp->idma.idma_core[counter_index].memcpy_fifo_initial_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; + TRACEN(k_t_init,"(=)DMA_InjFifoInitById rc=%d initial_head=0x%08x", ret , dma_tcp->idma.idma_core[counter_index].memcpy_fifo_initial_head); + } + } + } + /* Set up a reception counter for 'memcpy' */ + { + /* Initialize reception counter group */ + int ret __attribute__ ((unused)) = DMA_CounterGroupAllocate( DMA_Type_Reception, + k_ReceptionCounterGroupMemcpy, /* group number */ + DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP, + dma_tcp->memcpyRecCntrSubgrps, +/* TODO: Not really taking interrupts from this counter group, but maybe it has to be coherent ? */ +// 0, /* target core for interrupts */ +// NULL, /* Not planning to take interrupts from memcpy counters */ + 2, /* target core for interrupts */ + dummyCounterZeroHandler, + NULL, + NULL, + & dma_tcp->memcpyRecCounterGroup ); + TRACEN(k_t_init,"(=)DMA_CounterGroupAllocate rc=%d", ret ); + } +/* { */ +/* int counter_index ; */ +/* for( counter_index=0; counter_index< DMA_NUM_COUNTERS_PER_GROUP; counter_index += 1 ) */ +/* { */ +/* DMA_CounterSetDisableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ; */ +/* DMA_CounterSetValueBaseMaxHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr,k_counter_idle_value,0,0xffffffff) ; */ +/* } */ +/* _bgp_msync() ; */ +/* // for( counter_index=0; counter_index< k_injecting_cores; counter_index += 1 ) */ +/* // { */ +/* // DMA_CounterSetEnableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ; */ +/* // } */ +/* _bgp_msync() ; */ +/* } */ + + + +} +} diff --git a/drivers/net/bgp_torus/bgp_dma_tcp.c b/drivers/net/bgp_torus/bgp_dma_tcp.c new file mode 100644 index 00000000000000..9e63e4e664db51 --- /dev/null +++ b/drivers/net/bgp_torus/bgp_dma_tcp.c @@ -0,0 +1,931 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Description: Blue Gene low-level driver for sockets over torus + * + * Intent: Send a 'request block' to the partner's memory FIFO + * Partner initiates a 'remote read' from me + * Partner sends a 'response block' to my FIFO to say the data is transferred + * + ********************************************************************/ +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/highmem.h> +#include <linux/mman.h> +#include <linux/syscalls.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> + +#include <linux/cpu.h> +#include <linux/cpuset.h> +#include <linux/bootmem.h> + + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/time.h> +#include <linux/vmalloc.h> + +#include <linux/dma-mapping.h> + +#include <net/inet_connection_sock.h> +#include <net/inet_sock.h> +#include <net/inet_hashtables.h> +#include <net/tcp.h> +#include <net/tcp_hiatus.h> + +#include <spi/linux_kernel_spi.h> + +#include "bgp_dma_tcp.h" + +#include "bgp_bic_diagnosis.h" +#include "../bgp_network/bgdiagnose.h" + +/* #define TRUST_TORUS_CRC */ + +#define SEND_SHORT_FRAMES_INLINE +#define ENABLE_TUNING + +#define ENABLE_LEARNING_ADDRESSES + +#if !defined(CONFIG_BLUEGENE_TCP_WITHOUT_NAPI) +/* Select operation with linux 'dev->poll' */ +#define TORNIC_DEV_POLL + +/* #if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_VRNIC) */ +/* #define TORNIC_STEAL_POLL_CORE */ +/* #endif */ + +#endif + + +/* #define REQUIRES_DUMPMEM */ + +/* #if defined(CONFIG_BLUEGENE_TORUS_TRACE) */ +/* int bgp_dma_tcp_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */ +int bgp_dma_tcp_tracemask = k_t_init | k_t_request | k_t_error | k_t_congestion ; // | k_t_scattergather ; +/* int bgp_dma_tcp_tracemask = k_t_init | k_t_request | k_t_error | k_t_congestion |k_t_irqflow|k_t_irqflow_rcv; */ +/* int bgp_dma_tcp_tracemask = 0xffffffff ; */ +/* int bgp_dma_tcp_tracemask = k_t_request | k_t_error ; */ +/* #endif */ + +/* extern int sysctl_somaxconn ; // listening socket backlog, will want to increase this to allow at least 'n' SYNs per node in the block */ +/* #define DEBUG_CLEAR_SKB */ + +//extern int bgp_dma_irq ; /* Interrupt number that the torus is using */ + +enum { + k_fifo_irq = 124 , /* Linux interrupt number for 'fifo threshold crossing' interrupt */ + k_rec_counter_irq = 132 /* Linux interrupt number for 'reception counter hit zero' interrupt */ +}; + +enum { + k_find_source_of_rst_flags = 1 /* Whether to enable making a fuss about the source of a 'rst' frame */ +}; + +#if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) +#define TORNIC_TORUS_AFFINITY +#endif + +enum { + k_TorusAffinityCPU = +#if defined(TORNIC_TORUS_AFFINITY) + 2 +#else + 0 +#endif +}; + +extern cpumask_t cpu_nouser_map; /* Added to support 'steal' of core prior to long-running softirq */ + +int __init +dma_tcp_module_init (void); +/* void __exit dma_tcp_module_cleanup (void); */ + +/* module_init(dma_tcp_module_init); */ +/* module_exit(dma_tcp_module_cleanup); */ + +#if defined(CONFIG_BGP_STATISTICS) +int rtt_histogram[33] ; +int transit_histogram[33] ; +#endif + + +MODULE_DESCRIPTION("BG/P sockets over torus DMA driver"); +MODULE_LICENSE("GPL"); + + +#define TCP_DMA_NAME "tcp_bgp_dma" +#ifndef CTL_UNNUMBERED +#define CTL_UNNUMBERED -2 +#endif + +/* Routines related to interrupt management from bgp_bic.c */ +void bic_disable_irq(unsigned int irq) ; /* Intended to be called from a FLIH to indicate that this interrupt will not fire again */ +void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ; /* Intended to indicate which core will take the next interrupt of this type. Doesn't explocitly enable but other async things may enable */ +void bic_unmask_irq(unsigned int irq) ; /* Explicitly enable this interrupt */ + + + +#define ENABLE_TIMESTAMP_TRACKING +enum { + k_FLIH_Entry , + k_FLIH_Exit , + k_SLIH_Entry , + k_SLIH_Exit , + k_Poll_Entry , + k_Poll_Exit , + k_Enable , + k_CouldEnable , + k_Quantity +}; + +static char *timestamp_names[] = { + "k_FLIH_Entry" , + "k_FLIH_Exit" , + "k_SLIH_Entry" , + "k_SLIH_Exit" , + "k_Poll_Entry" , + "k_Poll_Exit" , + "k_Enable" , + "k_CouldEnable" +}; + +typedef struct { + unsigned int hi ; + unsigned int lo ; +} timestamp_t ; + +#if defined(ENABLE_TIMESTAMP_TRACKING) +enum { + k_TimestampRingSize = 8 +}; + +typedef struct { + unsigned int current_index ; + timestamp_t timestamp[k_TimestampRingSize] ; +} timestamp_ring_t; + +static timestamp_ring_t timestamp_ring[k_Quantity] ; +#endif + +static void record_timestamp(unsigned int x) + { +#if defined(ENABLE_TIMESTAMP_TRACKING) + unsigned int tbhi = get_tbu(); + unsigned int tblo = get_tbl(); + unsigned int tbhi2 = get_tbu(); + unsigned int tblo2 = ( tbhi == tbhi2 ) ? tblo : 0 ; + timestamp_ring_t *tr = timestamp_ring+x ; + unsigned int cx=tr->current_index ; + unsigned int cxm=cx&(k_TimestampRingSize-1) ; + tr->timestamp[cxm].hi = tbhi2 ; + tr->timestamp[cxm].lo = tblo2 ; + TRACEN(k_t_detail,"Timestamp %s[%d] = 0x%08x%08x",timestamp_names[x],cx,tbhi2,tblo2) ; + tr->current_index=cx+1 ; +#endif + } + +static void show_timestamps(void) + { +#if defined(ENABLE_TIMESTAMP_TRACKING) + int x ; + TRACEN(k_t_detail,"(>)") ; + for(x=0;x<k_Quantity;x+=1) + { + timestamp_ring_t *tr = timestamp_ring+x ; + unsigned int cx=tr->current_index ; + int q ; + for(q=-k_TimestampRingSize;q<0 ; q+=1) + { + unsigned int cxm=(cx+q)&(k_TimestampRingSize-1) ; + TRACEN(k_t_request,"Timestamp %s[%03d] = 0x%08x%08x",timestamp_names[x],q,tr->timestamp[cxm].hi,tr->timestamp[cxm].lo) ; + } + } + TRACEN(k_t_detail,"(<)") ; +#endif + } + +static void init_tuning(dma_tcp_t *dma_tcp) + { +#if defined(CONFIG_BLUEGENE_TCP) + dma_tcp->bluegene_tcp_is_built = 1 ; +#else + dma_tcp->bluegene_tcp_is_built = 0 ; +#endif + dma_tcp->tuning_num_packets = 0x7fffffff ; /* up from '1', used 16 at one time */ +#if defined(KEEP_TCP_FLAG_STATS) + dma_tcp->tcp_received_flag_count[0] = 0 ; + dma_tcp->tcp_received_flag_count[1] = 0 ; + dma_tcp->tcp_received_flag_count[2] = 0 ; + dma_tcp->tcp_received_flag_count[3] = 0 ; + dma_tcp->tcp_received_flag_count[4] = 0 ; + dma_tcp->tcp_received_flag_count[5] = 0 ; + dma_tcp->tcp_received_flag_count[6] = 0 ; + dma_tcp->tcp_received_flag_count[7] = 0 ; +#endif +#if defined(TORNIC_DEV_POLL) +#if defined(TORNIC_STEAL_POLL_CORE) + /* dma_tcp->tuning_num_empty_passes = 1000000 ; // Try 1 second 'spin' if no data coming */ + dma_tcp->tuning_num_empty_passes = 5000 ; /* Try 5 millisecond 'spin' if no data coming if we have a whole core for it */ + dma_tcp->tuning_non_empty_poll_delay = 850 ; +#else + /* Sharing a core, but with 'poll' NAPI */ + dma_tcp->tuning_num_empty_passes = 1 ; /* Try 10 microsecond 'spin' if no data coming if we are sharing core with app */ + dma_tcp->tuning_non_empty_poll_delay = 1 ; +#endif +#else + /* 'interrupts' NAPI */ + dma_tcp->tuning_num_empty_passes = 1 ; /* Try 10 microsecond 'spin' if no data coming if we are sharing core with app */ + dma_tcp->tuning_non_empty_poll_delay = 1 ; +#endif + dma_tcp->tuning_poll_after_enabling = 1 ; /* changed from 0 on 20080619 */ + dma_tcp->tuning_run_handler_on_hwi = 0 ; /* was 1 */ + dma_tcp->tuning_clearthresh_slih = 1 ; /* = 0 , whether to clear the 'threshold crossed' bit in the slih */ + dma_tcp->tuning_clearthresh_flih = 0 ; /* = 0 , whether to clear the 'threshold crossed' bit in the flih */ + dma_tcp->tuning_disable_in_dcr = 1 ; /* = 1, whether to toggle the DCR interrupt enable/disable */ + dma_tcp->tuning_exploit_reversepropose = 1 ; /* which way to run the propose/accept protocol */ + dma_tcp->tuning_counters_per_source = 0 ; /* Max reception counters to commit per source node (0 indicates to use 'shareout' algorithm */ + dma_tcp->tuning_min_icsk_timeout = 200 ; /* Push TCP timeout on torus up to 200 jiffies, we think we have a reliable network ... */ + dma_tcp->tuning_injection_hashmask = 3 ; /* = 3, whether to mask down the number of injection fifos per direction */ + dma_tcp->tuning_virtual_channel = k_VC_anyway ; /* Select adaptive routing at boot time */ + } + +dma_tcp_t dma_tcp_state ; + + +/* void __exit */ +/* dma_tcp_module_cleanup (void) */ +/* { */ +// /* nothing to do */ +/* } */ + + + +/* #if defined(CONFIG_BLUEGENE_TCP) */ +#if 1 +static int bgp_dma_tcp_poll(dma_tcp_t *) ; +static int bgp_dma_tcp_poll(dma_tcp_t *dma_tcp) +{ +/* Values when I inherited the code, now taken from 'tuning params' */ +/* int num_packets = 1; // received packets one by one */ +/* int num_empty_passes = 512; */ +/* int non_empty_poll_delay = 850; */ +/* Other values I have tried */ +/* int num_packets = 100; */ +/* int num_empty_passes = 0; */ +/* int non_empty_poll_delay = 0; */ +/* int num_packets = 100; // received packets 100 at a time */ +/* int num_empty_passes = 5; */ +/* int non_empty_poll_delay = 10; */ +/* dumpmem(dma_tcp_state.receptionFIFO,128,"Reception memory FIFO") ; */ + + int ret ; + TRACEN(k_t_irqflow, "(>) tuning_num_packets=%d tuning_num_empty_passes=%d tuning_non_empty_poll_delay=%d", + dma_tcp->tuning_num_packets,dma_tcp->tuning_num_empty_passes,dma_tcp->tuning_non_empty_poll_delay ); + dma_tcp->device_stats = bgtornet_stats() ; + ret = DMA_RecFifoPollNormalFifoById( dma_tcp->tuning_num_packets, + recFifoId, + dma_tcp->tuning_num_empty_passes, + dma_tcp->tuning_non_empty_poll_delay, + dma_tcp->recFifoGroup, + bgp_dma_tcp_empty_fifo_callback); + touch_softlockup_watchdog() ; /* If we get a continuous stream of packets, we do not really want the softlockup watchdog to bark */ + TRACEN(k_t_irqflow, "(<) ret=%d",ret ); +/* ASSERT( ret >= 0 ); */ + return ret; +} + + +static void recfifo_disable(void) + { + TRACEN(k_t_detail,"(><)") ; + mtdcrx(0xd71,0) ; + } + +static void recfifo_enable(void) + { + TRACEN(k_t_detail,"(><)") ; + record_timestamp(k_Enable) ; + mtdcrx(0xd71,0x80000000) ; + } + +static void reccounter_disable(void) + { + TRACEN(k_t_detail,"(><)") ; + mtdcrx(0xd7a,0) ; + } + +static void reccounter_enable(void) + { + TRACEN(k_t_detail,"(><)") ; + record_timestamp(k_Enable) ; + mtdcrx(0xd7a,0xffffffff) ; + } + +static void dma_tcp_slih_handler(unsigned long dummy) + { + int ret; + dma_tcp_t *dma_tcp = &dma_tcp_state ; + record_timestamp(k_SLIH_Entry) ; + + TRACEN(k_t_irqflow,"(>)" ); + enable_kernel_fp() ; + ret = bgp_dma_tcp_poll(dma_tcp); + /* Clear the 'threshold crossed' flag so we don't automatically reinterrupt */ + DMA_RecFifoSetClearThresholdCrossed( dma_tcp_state.recFifoGroup, + 0x80000000, + 0 ); +#if defined(HAS_MISSED_INTERRUPT_TIMER) + mod_timer(&dma_tcp->torus_missed_interrupt_timer, jiffies+200) ; /* Cause timer interrupt after 2000ms if things don't stay alive ... temp while diagnosing problem ... */ +#endif + record_timestamp(k_SLIH_Exit) ; +#if !defined(TORNIC_DEV_POLL) + recfifo_enable() ; + reccounter_enable() ; +#endif + TRACEN(k_t_irqflow,"(<)" ); + } + +static void trip_missed_interrupt(dma_tcp_t *dma_tcp) +{ + unsigned int fifo_dcr = mfdcrx(0xd71) ; + unsigned int counter_dcr = mfdcrx(0xd7a) ; + struct bic_regs * bic_regs = bic.regs ; + unsigned int target_2_3 = bic_regs->group[2].target[3] ; + unsigned int target_3_0 = bic_regs->group[3].target[0] ; + unsigned int notEmpty = DMA_RecFifoGetNotEmpty(dma_tcp->recFifoGroup,0) ; + unsigned int thresholdCrossed = DMA_RecFifoGetThresholdCrossed(dma_tcp->recFifoGroup,0) ; + if( fifo_dcr != 0x80000000 || counter_dcr != 0xffffffff || target_2_3 != 0x00006000 || target_3_0 != 0x00006000 || notEmpty != 0 ) + { + TRACEN(k_t_general,"maybe missed interrupt fifo_dcr=0x%08x counter_dcr=0x%08x target_2_3=0x%08x target_3_0=0x%08x notEmpty=0x%08x thresholdCrossed=0x%08x", + fifo_dcr,counter_dcr,target_2_3,target_3_0,notEmpty,thresholdCrossed) ; + dma_tcp_slih_handler(0) ; + } +} +#if defined(HAS_MISSED_INTERRUPT_TIMER) +static void dma_tcp_missed_interrupt(unsigned long dummy) +{ + dma_tcp_t *dma_tcp = &dma_tcp_state ; + TRACEN(k_t_irqflow,"(>)") ; + trip_missed_interrupt(dma_tcp) ; + mod_timer(&dma_tcp->torus_missed_interrupt_timer, jiffies+10) ; /* Cause timer interrupt after 100ms if things don't stay alive ... temp while diagnosing problem ... */ + TRACEN(k_t_irqflow,"(<)") ; +} +#endif +static volatile int dma_ticket_req ; +static volatile int dma_ticket_ack ; + +void dma_tcp_poll_handler(void) + { + int cur_ticket_req = dma_ticket_req ; + record_timestamp(k_Poll_Entry) ; + + dma_ticket_ack = cur_ticket_req ; + TRACEN(k_t_irqflow,"dma_tcp_poll_handler: cur_ticket_req=%d (>)",cur_ticket_req ); + dma_tcp_slih_handler(0) ; + TRACEN(k_t_irqflow,"dma_tcp_poll_handler: cur_ticket_req=%d (<)",cur_ticket_req ); + record_timestamp(k_Poll_Exit) ; + } + +void dma_tcp_rx_enable(void) + { + unsigned long flags ; + TRACEN(k_t_irqflow,"(>)" ); + record_timestamp(k_CouldEnable) ; + recfifo_enable() ; + reccounter_enable() ; + bic_set_cpu_for_irq(k_fifo_irq,k_TorusAffinityCPU) ; + bic_set_cpu_for_irq(k_rec_counter_irq,k_TorusAffinityCPU) ; + /* Both interrupts unmasked before we take one to avoid the chance of an interrupt after the first */ + /* which (?) could go round the loop and 'do the wrong thing' with respect to napi and enabling the second */ + /* while trying to run the napi poll */ + local_irq_save(flags) ; + bic_unmask_irq(k_fifo_irq) ; + bic_unmask_irq(k_rec_counter_irq) ; + local_irq_restore(flags) ; + /* If we get here and there's an 'interrupt cause' in the DCRs, we have missed an interrupt. Trace it and fire the SLIH. */ + trip_missed_interrupt(&dma_tcp_state ) ; + TRACEN(k_t_irqflow,"(<)" ); + + } + +static DECLARE_TASKLET(dma_tcp_slih, dma_tcp_slih_handler,0) ; + +/* This gets driven in the FLIH when a DMA interrupt occurs */ +static void receiveFLIH(u32 arg1, u32 arg2, u32 arg3, u32 arg4) +{ + TRACEN(k_t_irqflow,"(>) FLIH" ); + record_timestamp(k_FLIH_Entry) ; + bic_disable_irq(k_fifo_irq) ; + bic_disable_irq(k_rec_counter_irq) ; + bgtornet_rx_schedule() ; + record_timestamp(k_FLIH_Exit) ; + TRACEN(k_t_irqflow,"(<) FLIH" ); +} + +static void receiveCommHandler(u32 arg1, u32 arg2, u32 arg3, u32 arg4) +{ + TRACEN(k_t_irqflow,"(>)" ); + recfifo_disable() ; + receiveFLIH(arg1,arg2,arg3,arg4) ; + TRACEN(k_t_irqflow,"(<)" ); +} + +/* This gets driven in the FLIH when a DMA interrupt occurs */ +static void receiveCounterZeroHandler(u32 arg1, u32 arg2, u32 arg3, u32 arg4) +{ + TRACEN(k_t_irqflow,"(>)" ); + reccounter_disable() ; + receiveFLIH(arg1,arg2,arg3,arg4) ; + TRACEN(k_t_irqflow,"(<)" ); +} + + +static int unknownActor(DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) + { + unsigned int SW_Arg __attribute__ ((unused)) =packet_ptr->SW_Arg ; + unsigned int Func_Id __attribute__ ((unused)) =packet_ptr->Func_Id ; + unsigned int x __attribute__ ((unused)) =SW_Arg >> 16 ; + unsigned int y __attribute__ ((unused)) =( SW_Arg >> 8) & 0xff ; + unsigned int z __attribute__ ((unused)) =SW_Arg & 0xff ; + TRACEN(k_t_error,"(!!!) %08x %02x (%02x,%02x,%02x) payload_ptr=%p payload_bytes=%d", SW_Arg,Func_Id,x,y,z,payload_ptr, payload_bytes ); + return 0 ; + } + +/* static char reception_fifo_buffer[k_desired_reception_memory_fifo_size] __attribute__ ((__aligned__(32))) ; */ +/* We need a reception FIFO; we are prepared to compromise on its size */ +static void __init +dma_tcp_setup_reception_fifo(dma_tcp_t *dma_tcp) + { + unsigned int allocation_size=k_desired_reception_memory_fifo_size ; + void * allocation_address=local_permanent_alloc(k_desired_reception_memory_fifo_size) ; + dma_tcp->receptionfifo = allocation_address ; + dma_tcp->receptionfifoSize = allocation_size ; + /* Must get a memory FIFO area, and it must be L1-aligned */ + BUG_ON(allocation_address == NULL) ; + BUG_ON(0 != (0x1f & (int)allocation_address)) ; + if( allocation_address != NULL ) + { + memset(allocation_address, 0xcc, allocation_size) ; + } + TRACEN(k_t_init,"reception_fifo address=%p length=%d=0x%08x",allocation_address,allocation_size,allocation_size) ; + } + +#endif + + +void __init +bgp_fpu_register_memcpy_sysctl(void) ; + +enum +{ + k_enable_dma_memcpy = 1 +} ; + +static void __init +dma_tcp_init(dma_tcp_t *dma_tcp, BGP_Personality_t *pers) + { + int compute_node_count = pers->Network_Config.Xnodes*pers->Network_Config.Ynodes*pers->Network_Config.Znodes ; + int i_am_compute_node= ( pers->Network_Config.Rank != pers->Network_Config.IOnodeRank ) ; + TRACEN(k_t_init,"(>) PAGE_SHIFT=%d PAGE_SIZE=%lu", PAGE_SHIFT, PAGE_SIZE ); + bgp_fpu_register_memcpy_sysctl() ; + init_tuning(dma_tcp) ; + dma_tcp->location.coordinate[0] = pers->Network_Config.Xcoord; + dma_tcp->location.coordinate[1] = pers->Network_Config.Ycoord; + dma_tcp->location.coordinate[2] = pers->Network_Config.Zcoord; + dma_tcp->extent.coordinate[0] = pers->Network_Config.Xnodes; + dma_tcp->extent.coordinate[1] = pers->Network_Config.Ynodes; + dma_tcp->extent.coordinate[2] = pers->Network_Config.Znodes; + dma_tcp->node_count = compute_node_count ; + dma_tcp->node_slot_mask = (compute_node_count )-1 ; + + dma_tcp->SW_Arg = (pers->Network_Config.Xcoord << 16) + | (pers->Network_Config.Ycoord << 8) + | (pers->Network_Config.Zcoord) ; + dma_tcp->src_key = dma_tcp->location.coordinate[0]*dma_tcp->extent.coordinate[1]*dma_tcp->extent.coordinate[2] + +dma_tcp->location.coordinate[1]*dma_tcp->extent.coordinate[2] + +dma_tcp->location.coordinate[2] ; + + dma_tcp->xbits = fls(pers->Network_Config.Xnodes)-1 ; + dma_tcp->ybits = fls(pers->Network_Config.Ynodes)-1 ; + dma_tcp->zbits = fls(pers->Network_Config.Znodes)-1 ; + /* YKT BGP seems wired so that no partition less than 8x8x8 is a torus in any dimension */ + dma_tcp->is_torus_x = (pers->Network_Config.Xnodes >= 8 && pers->Network_Config.Ynodes >= 8 && pers->Network_Config.Znodes >= 8) ; + dma_tcp->is_torus_y = dma_tcp->is_torus_x ; + dma_tcp->is_torus_z = dma_tcp->is_torus_x ; + dma_tcp->block_id = pers->Network_Config.BlockID & 0x00ffffff ; + dma_tcp->i_am_compute_node = i_am_compute_node ; + TRACEN(k_t_init,"SW_Arg=0x%08x rank=%d=0x%08x src_key=0x%08x xbits=%d ybits=%d zbits=%d ", + dma_tcp->SW_Arg, pers->Network_Config.Rank, pers->Network_Config.Rank, dma_tcp->src_key, + dma_tcp->xbits,dma_tcp->ybits,dma_tcp->zbits ); + + if( 0 == dma_tcp->mtu) + { + bgp_dma_tcp_set_mtu(dma_tcp, 64996) ; + } + +#if defined(TORUS_RECEIVE_WITH_SLIH) +#else + skb_queue_head_init(&dma_tcp->skb_pool) ; + skb_queue_head_init(&dma_tcp->skb_list_free) ; +#endif + { + int core ; + for( core=0; core<k_injecting_cores; core += 1) + { + int desired_fifo ; + for(desired_fifo=0;desired_fifo<k_injecting_directions;desired_fifo+=1) + spin_lock_init(&dma_tcp->dirInjectionLock[core*k_injecting_directions+desired_fifo]) ; + } + } + +#if defined(TORUS_RECEIVE_WITH_SLIH) +#else + tasklet_schedule(&pool_filler_slih) ; +#endif + +#if defined(CONFIG_BLUEGENE_TCP) + /* Only compute nodes are torus-capable ... */ + if( pers->Network_Config.Rank != pers->Network_Config.IOnodeRank ) + { + +#if defined(HAS_MISSED_INTERRUPT_TIMER) + setup_timer(&dma_tcp->torus_missed_interrupt_timer,dma_tcp_missed_interrupt,0) ; +#endif + { + int subX ; + for(subX=0;subX<DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP;subX +=1) + { + dma_tcp->injCntrSubgrps[ subX ] = subX ; + dma_tcp->recCntrSubgrps[ subX ] = subX ; + } + } + + /* register a receive function for 'unrecognised' memfifo packets */ + DMA_RecFifoRegisterRecvFunction(unknownActor, dma_tcp, 1, 0); + + dma_tcp_setup_reception_fifo(dma_tcp) ; + dma_tcp->recMap.threshold[0] = dma_tcp->receptionfifoSize/16; /* generate interrupts when anything is in the fifo */ + { + int ret __attribute__ ((unused)) = DMA_RecFifoSetMap( &dma_tcp->recMap ); /* fifo 0 will receive packets from everywhere */ + + TRACEN(k_t_init,"(=)DMA_RecFifoSetMap rc=%d", ret ); + } + /* Register functions for 'frames' style access */ + dma_tcp_frames_init(dma_tcp) ; + + + + /* set up rec fifo group */ + dma_tcp->recFifoGroup = DMA_RecFifoGetFifoGroup( k_ReceptionFifoGroup, 0, receiveCommHandler, NULL, NULL, NULL, NULL ); + + + TRACEN(k_t_init,"(=)DMA_RecFifoGetFifoGroup dma_tcp->recFifoGroup=%p", dma_tcp->recFifoGroup ); + + /* initalize rec fifo */ + { + int ret __attribute__ ((unused)) = DMA_RecFifoInitById ( dma_tcp->recFifoGroup, + recFifoId, + dma_tcp->receptionfifo, /* fifo start */ + dma_tcp->receptionfifo, /* fifo head */ + dma_tcp->receptionfifo+dma_tcp->receptionfifoSize /* fifo end */ + ); + TRACEN(k_t_init,"(=)DMA_RecFifoInitById rc=%d", ret ); + } + TRACEN(k_t_general, "(=)(I) testdma: CounterGroupAllocate"); + + { + /* Initialize injection counter group */ + int ret __attribute__ ((unused)) = DMA_CounterGroupAllocate( DMA_Type_Injection, + k_InjectionCounterGroup, /* group number */ + DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP, + dma_tcp->injCntrSubgrps, + 0, /* target core for interrupts */ + NULL, + NULL, + NULL, + & dma_tcp->injCounterGroup ); + + TRACEN(k_t_init,"(=)DMA_CounterGroupAllocate rc=%d", ret ); + } + memset(dma_tcp->inj_skbs,0,DMA_NUM_COUNTERS_PER_GROUP*sizeof(struct sk_buff *)) ; + + /* enable the counter */ + { + int ret; + DMA_CounterSetEnableById( & dma_tcp->injCounterGroup,0) ; + ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup,0,0xffffffff) ; + TRACEN(k_t_general, "(=)(I) testdma: DMA_CounterSetValueWideOpenById ret=%d",ret) ; + + } + +#if defined(CONFIG_WRAP_COPY_TOFROM_USER) && defined(CONFIG_BLUEGENE_DMA_MEMCPY) + /* TODO: Investigate why 'dma_memcpy' needed to be initialised before 'dma_tcp counters' */ + if( k_enable_dma_memcpy) bgp_dma_memcpyInit(dma_tcp) ; +#endif + { + /* Initialize reception counter group */ + int ret __attribute__ ((unused)) = DMA_CounterGroupAllocate( DMA_Type_Reception, + k_ReceptionCounterGroup, /* group number */ + DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP, + dma_tcp->recCntrSubgrps, + 2, /* target core for interrupts */ + receiveCounterZeroHandler, + NULL, + NULL, + & dma_tcp->recCounterGroup ); + TRACEN(k_t_init,"(=)DMA_CounterGroupAllocate rc=%d", ret ); + } + memset(dma_tcp->recCntrInUse,0,DMA_NUM_COUNTERS_PER_GROUP) ; + memset(dma_tcp->rcv_skbs,0,DMA_NUM_COUNTERS_PER_GROUP*sizeof(struct sk_buff *)) ; + dma_tcp->qtyFreeRecCounters = 64 ; + dma_tcp->scanRecCounter = 0 ; + dma_tcp->framesDisposed = 0 ; + atomic_set(&dma_tcp->framesProposed, 0 ) ; + } + dma_tcp_devfs_procfs_init(dma_tcp) ; +#endif + TRACEN(k_t_init,"(<)" ); + + + + } + +void bgp_torus_set_mtu(unsigned int mtu) + { + bgp_dma_tcp_set_mtu(&dma_tcp_state, mtu) ; + } + +int __init +dma_tcp_module_init(void) +{ + int ret = 0; + + BGP_Personality_t pers; + + bluegene_getPersonality(&pers, sizeof(pers)); + + dma_tcp_init(&dma_tcp_state, &pers) ; + + TRACEN(k_t_init, "(I)initDMA finished ret:%d",ret); + return ret; +} + +static void fix_retransmit_timeout(struct sk_buff *skb) +{ + dma_tcp_t *dma_tcp = &dma_tcp_state ; + struct sock *sk = skb->sk ; + unsigned int family=sk->sk_family ; + struct inet_sock *inet = inet_sk(sk) ; + struct inet_connection_sock *icsk = inet_csk(sk) ; + int is_icsk = inet->is_icsk ; + TRACEN(k_t_detail,"skb=%p sk=%p sk_family=0x%04x is_icsk=%d",skb,sk,family,is_icsk) ; + if( AF_INET == family && is_icsk ) + { + TRACEN(k_t_detail,"icsk_timeout-jiffies=%lu icsk_rto=%u",icsk->icsk_timeout-jiffies,icsk->icsk_rto) ; + if( icsk->icsk_rto < dma_tcp->tuning_min_icsk_timeout ) + { + icsk->icsk_rto=dma_tcp->tuning_min_icsk_timeout ; + } + } + +} +int bgp_dma_tcp_send_and_free( struct sk_buff *skb ) +{ + int rc ; + if( k_find_source_of_rst_flags && dma_tcp_state.tuning_diagnose_rst ) + { + struct ethhdr *eth = (struct ethhdr *)skb->data; + unsigned int h_proto = eth->h_proto ; + if( ETH_P_IP == h_proto ) + { + struct iphdr *iph = (struct iphdr *)(eth+1) ; + if(IPPROTO_TCP == iph->protocol ) + { + struct tcphdr *tcph = (struct tcphdr *)(iph+1) ; + if( tcph->rst) + { + TRACEN(k_t_request,"RST on frame to [%02x:%02x:%02x]", + eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]) ; + show_stack(0,0) ; /* Stack back-chain may help explain why it was sent */ + + } + } + + } + + } + fix_retransmit_timeout(skb) ; + rc = bgp_dma_tcp_send_and_free_frames(skb) ; + return rc ; +} + +/* Test if we think a socket is affected by torus congestion. Do this by looking to see if anything is in any software transmit FIFO */ +unsigned int bgp_torus_congestion(struct sock *sk) + { + unsigned int core ; + unsigned int direction ; + struct inet_connection_sock *icskp = inet_csk(sk) ; + struct inet_sock *inet = inet_sk(sk); + unsigned int daddr=inet->daddr ; + dma_tcp_t *dma_tcp=&dma_tcp_state ; + struct sk_buff *skb = skb_peek(&sk->sk_write_queue) ; + + if( dma_tcp->i_am_compute_node + ) + { + if( NULL == skb ) + { + TRACEN(k_t_congestion,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d q-empty-retransmit", + sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags, + daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff, + icskp->icsk_retransmits, icskp->icsk_rto + ) ; + return 0 ; + } + if( 0 == skb->len) + { + TRACEN(k_t_general,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d ack-transmit", + sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags, + daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff, + icskp->icsk_retransmits, icskp->icsk_rto + ) ; + return 0 ; + } +#if defined(USE_SKB_TO_SKB) + { + unsigned int framesProposed=atomic_read(&dma_tcp->framesProposed) ; + unsigned int framesDisposed=dma_tcp->framesDisposed ; + if( framesProposed != framesDisposed) + { + TRACEN(k_t_general, + "sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u propose=0x%08x disp=0x%08x\n", + sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags, + daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff, + framesProposed,framesDisposed + ) ; + return 1 ; + + } + } +#endif + for( core=0; core<k_injecting_cores; core += 1) + { + for( direction=0;direction<k_injecting_directions; direction+=1) + { + unsigned int fifo_current_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction]) ; + unsigned int fifo_current_tail = + (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction]) ; + if( fifo_current_head != fifo_current_tail) + { + TRACEN(k_t_general, + "sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u core=%d direction=%d fifo_current_head=0x%08x fifo_current_tail=0x%08x\n", + sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags, + daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff, + core,direction, + fifo_current_head,fifo_current_tail + ) ; + return 1 ; + } + } + } + } + + TRACEN(k_t_congestion,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d retransmit", + sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags, + daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff, + icskp->icsk_retransmits, icskp->icsk_rto + ) ; +/* if( icskp->icsk_rto < 300) */ +/* { */ +/* icskp->icsk_rto = icskp->icsk_rto << 1 ; */ +/* return 1 ; */ +/* } */ + return 0 ; + } + +void analyse_retransmit(struct sock *sk, struct sk_buff *skb) + { + if( skb && skb->len>0 ) /* Need a SKB,and if len=0 then it's an ACK with no data */ + { + struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + unsigned int daddr=inet->daddr ; + unsigned int daddr_b0 = daddr >> 24 ; + if( daddr_b0 == 11 || daddr_b0 == 12 ) /* BGP fabric is 11.*.*.* and 12.*.*.* , only interested in those */ + { + TRACEN(k_t_congestion,"(I) sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d resending (BGP)", + sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags, + daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,icsk->icsk_retransmits, icsk->icsk_rto) ; + } + } + + } + + +/* Seem to have picked up a half-implemented feature. Dummy it. */ +DMA_CounterAppSegment_t *DMA_CounterAppSegmentArray; +int DMA_CounterInitAppSegments(void) { return 0 ; } + +void show_personality(void) ; +void show_sprs(void) ; +/* Issue a diagnostic op at the DMA layer */ +void torus_diag(int op) + { + BGP_Personality_t pers; + TRACES("(>)op=%d",op) ; + + bluegene_getPersonality(&pers, sizeof(pers)); + switch(op) + { + case 0: + show_bic_regs() ; + break ; + case 1: +#if defined(CONFIG_BLUEGENE_TCP) + if( pers.Network_Config.Rank != pers.Network_Config.IOnodeRank ) + { + tasklet_schedule(&dma_tcp_slih); + } +#endif + break ; + case 2: + if( pers.Network_Config.Rank != pers.Network_Config.IOnodeRank ) + { + dumpdmadcrs(k_t_request) ; + } + break ; + case 3: +#if defined(CONFIG_BLUEGENE_TCP) + if( pers.Network_Config.Rank != pers.Network_Config.IOnodeRank ) + { + dumpRecFifoGroup(dma_tcp_state.recFifoGroup) ; + show_timestamps() ; + bgp_dma_tcp_display_pending_slots(&dma_tcp_state,dma_tcp_state.node_count) ; + } +#endif + break ; + case 4: +/* show_state() ; // kernel threads and their stacks */ + break ; + case 5: +/* show_tlbs() ; // This core's current TLBs */ +/* show_sprs() ; // Core special-purpose regs relevant to debugging */ +/* show_personality() ; // Items from the 'personality' from microcode */ + break ; + case 6: +/* #if defined(USE_SKB_TO_SKB) */ +/* bgp_dma_diag_reissue_rec_counters(&dma_tcp_state) ; */ +/* #endif */ + break ; + case 7: + #if defined(USE_SKB_TO_SKB) + dma_tcp_show_reception(&dma_tcp_state) ; + #endif + break ; + default: + ; + } + TRACES("(<)") ; + } diff --git a/drivers/net/bgp_torus/bgp_dma_tcp.h b/drivers/net/bgp_torus/bgp_dma_tcp.h new file mode 100644 index 00000000000000..3b6e60ea58a7e5 --- /dev/null +++ b/drivers/net/bgp_torus/bgp_dma_tcp.h @@ -0,0 +1,1623 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Description: Blue Gene low-level driver for sockets over torus + * + * + ********************************************************************/ +#ifndef __BGP_DMA_TCP_H__ +#define __BGP_DMA_TCP_H__ +#include <linux/bootmem.h> +#include <asm/div64.h> +#include <linux/timer.h> +#include <linux/bootmem.h> +#include <linux/sysctl.h> +#include <asm/atomic.h> + +#include "../bgp_network/bgp_net_traceflags.h" + +extern int bgp_dma_tcp_tracemask ; + +/* Can drop bits out of COMPILED_TRACEMASK if we want to selectively compile out trace */ +/* #define COMPILED_TRACEMASK (0xffffffff-k_t_irqflow-k_t_irqflow_rcv-k_t_detail-k_t_fifocontents-k_t_toruspkt) */ +#define COMPILED_TRACEMASK (0xffffffff) +/* #define COMPILED_TRACEMASK (k_t_error) */ + +/* #define TORNIC_DIAGNOSE_TLB */ +#include <linux/KernelFxLog.h> +/* 'XTRACEN' would be a dummied-out trace statement */ +#define XTRACEN(i,x...) +#if defined(CONFIG_BLUEGENE_TORUS_TRACE) +#define TRACING(i) (bgp_dma_tcp_tracemask & (COMPILED_TRACEMASK & (i))) +#define TRACE(x...) KernelFxLog(bgp_dma_tcp_tracemask & k_t_general,x) +#define TRACE1(x...) KernelFxLog(bgp_dma_tcp_tracemask & k_t_lowvol,x) +#define TRACE2(x...) KernelFxLog(bgp_dma_tcp_tracemask & k_t_detail,x) +#define TRACEN(i,x...) KernelFxLog(bgp_dma_tcp_tracemask & (COMPILED_TRACEMASK & (i)),x) +#define TRACED(x...) KernelFxLog(1,x) +#define TRACES(x...) KernelFxLog(1,x) +#else +#define TRACING(x) 0 +#define TRACE(x...) +#define TRACE1(x...) +#define TRACE2(x...) +#define TRACEN(i,x...) +#define TRACED(x...) +#define TRACES(x...) +#endif + +#if defined(CONFIG_BLUEGENE_TCP) +#define ENABLE_FRAMES +#endif + +#define AUDIT_FRAME_HEADER + +#define KEEP_TCP_FLAG_STATS + +#define BARRIER_WITH_IOCTL +/* #define EXERCISE_WITH_IOCTL */ + +void bgp_dma_diag_report_transmission_queue(int __user * report) ; + +#if defined(BARRIER_WITH_IOCTL) +void dma_tcp_transfer_activate_sync(int sendBytes) ; +int dma_tcp_transfer_wait_sync(int demandCount) ; +void dma_tcp_transfer_clearcount(void) ; +#endif + +#if defined(EXERCISE_WITH_IOCTL) +void dma_tcp_transfer_activate(int sendBytes) ; +void dma_tcp_transfer_activate_to_one(int sendBytes, unsigned int tg) ; +void dma_tcp_transfer_activate_minicube(int sendBytes) ; +int dma_tcp_transfer_wait(int demandCount) ; +#endif + +/* Whether we want a 'watchdog' on torus arrivals */ +#define HAS_MISSED_INTERRUPT_TIMER + +/* Adaptive routing controls. */ +/* USE_ADAPTIVE_ROUTING builds a runtime capable of it; lower the value in /sys/module/bgp_torus/parameters/bgp_dma_adaptive_frame_limit to get frames send that way */ +/* INITIAL_ADAPTIVE_ROUTING sets things that way at boot (and may set params up so that attempted deterministic routing isn't actually deterministic) */ +#if defined(CONFIG_BGP_TORUS_ADAPTIVE_ROUTING) +#define USE_ADAPTIVE_ROUTING +#define RESEQUENCE_ARRIVALS +#define INITIAL_ADAPTIVE_ROUTING +#endif + +/* Support for skbuff-to-skbuff DMA */ +#define USE_SKB_TO_SKB + +/* What to use the 'dest-key' in the linkhdr for. Timestamping looks good ... */ +/* #define ENABLE_LATENCY_TRACKING */ +/* #define TRACK_SEQUENCE */ +/* #define ENABLE_PROGRESS_TRACKING */ + +#define TORUS_RECEIVE_WITH_SLIH + +/* #define TORUS_WITH_SIGNATURES */ + +/* Diagnosic options */ +enum { + k_allow_interrupts_while_injecting = 0 , /* Select this for profiling injection */ + k_async_free = 1 , /* Set this to allow timer-based freeing of skbuffs where the DMA has completed */ + k_dumpmem_diagnostic = 0 , + k_scattergather_diagnostic = 0 , + k_verify_target = 0 , /* Whether to firewall-check that the target is reachable */ + k_detail_stats = 0 , /* Whether to collect detailed statistics */ + k_counter_flow_control = 1 , /* Whether to flow-control by limiting the number of reception counters allocates to a single source */ + k_force_eager_flow = 0 , /* Whether to start up with everything running 'eager' protocol (no 'rendezvous') */ + k_abbreviate_headlen = 1 , /* Whether to abbreviate the DMA transfer of 'head' in respect of the FIFO transfer */ + /* TODO: after testing that it works (on busy machines) , we should always take the 'deferral' path */ + k_allow_defer_skb_for_counter = 1, /* Whether to allow deferral allocating a 'full-size' skb until a reception counter is available */ + k_verify_ctlen = 1 , /* Whether to check that the length in the IP header matches the skbuff structure */ + k_configurable_virtual_channel = 1 /* Whether to allow runtime configuration of the virtual channel to use */ +}; + + + +enum { + numInjCounters = 1 , + recFifoId = 0 , + k_InjectionFifoGroupFrames = 0 , + k_InjectionFifoGroupMemcpy = 1 , + k_ReceptionFifoGroup = 0 , + k_InjectionCounterGroup = 0 , + k_ReceptionCounterGroup = 0 , + k_ReceptionCounterGroupMemcpy = 1 + +}; + +/* We handle fragmented skbuffs if they are presented. The receive side doesn't need to know; */ +/* the send side injects additional 'direct put' descriptors as needed. */ +/* The bytes on the wire might be slightly different split between cells, but on the receive side this */ +/* is all handled by hardware. */ +enum { + k_support_scattergather = 1 /* Whether we support a 'scattergather' skbuff */ +}; + +/* At one time, we ran per-core injection, to try to minimise the locking requirement. This is now changed to */ +/* per-destination injection, to try to minimise the out-of-order delivering. */ +enum { + k_injecting_cores = 4 , + k_skb_controlling_directions = 7 , /* 'directions' where we want to free skbuffs when sent */ +#if defined(USE_SKB_TO_SKB) + k_injecting_directions = 8 , /* 6 real directions, a 'taxi' for single packet messages, and a 'propose/accept stream' */ +#else + k_injecting_directions = 7 , /* 6 real directions, a 'taxi' for single packet messages */ +#endif +}; + +/* Following section for 'packets' style */ +enum { + k_torus_skb_alignment = 16 , + k_torus_link_payload_size = 240 +}; + +enum { + k_idma_descriptor_size = 32 , + k_injection_packet_size = 240 +} ; + +enum { +/* k_concurrent_receives = 32 */ /* Number of frames-in-flight we can handle from a source (in respect of adaptive routing) */ + k_concurrent_receives = 128 /* Number of frames-in-flight we can handle from a source (in respect of adaptive routing) */ +}; + +static inline void * local_permanent_alloc(unsigned int size) + { + void *result = kmalloc(size, GFP_KERNEL) ; + TRACEN(k_t_general,"size=0x%08x result=%p",size,result) ; + return result ; + } + +/* Using these when we are statically allocating buffers, or using alloc_bootmem_low */ +enum { + k_idma_descriptor_count = 16384, /* Design choice */ + k_injection_packet_count = 16384 /* Matches IDMA descriptor count, to keep tagging simple */ + /* k_injection_packet_count = (1<<22)/k_injection_packet_size // 4 megabytes of 'runway' */ +}; + +enum { + k_memcpy_idma_descriptor_count = 64, /* Design choice */ +}; + +typedef struct { + char buffer[k_idma_descriptor_size*k_memcpy_idma_descriptor_count] ; +} memcpy_packet_injection_memoryfifo_t __attribute__((aligned(16))); + +typedef struct { + char buffer[k_idma_descriptor_size*k_idma_descriptor_count] ; +} packet_injection_memoryfifo_t __attribute__((aligned(16))); + +typedef struct { + int tailx[k_injection_packet_count] ; +} packet_injection_tag_t ; + +typedef struct { + struct sk_buff * skb_array[k_injection_packet_count] ; +} packet_skb_array_t ; + +static inline packet_injection_memoryfifo_t * allocate_packet_injection_memoryfifo(unsigned int core, unsigned int direction) + { + packet_injection_memoryfifo_t * rc = local_permanent_alloc(sizeof(packet_injection_memoryfifo_t)) ; + BUG_ON(rc == NULL) ; + XTRACEN(k_t_init,"allocate_packet_injection_memoryfifo core=%d direction=%d rc=%p", + core, direction, rc ) ; + BUG_ON( ( ((unsigned int) rc) & 0x1f) != 0 ) ; /* Need 32-byte alignment */ + return rc ; + } + +static inline memcpy_packet_injection_memoryfifo_t * allocate_memcpy_packet_injection_memoryfifo(unsigned int core) + { + memcpy_packet_injection_memoryfifo_t * rc = local_permanent_alloc(sizeof(memcpy_packet_injection_memoryfifo_t)) ; + BUG_ON(rc == NULL) ; + TRACEN(k_t_general,"allocate_memcpy_packet_injection_memoryfifo core=%d rc=%p", + core, rc ) ; + BUG_ON( ( ((unsigned int) rc) & 0x1f) != 0 ) ; /* Need 32-byte alignment */ + return rc ; + } + +static inline packet_injection_tag_t * allocate_packet_injection_tag(unsigned int core, unsigned int direction) + { + packet_injection_tag_t * rc = kmalloc(sizeof(packet_injection_tag_t),GFP_KERNEL) ; + BUG_ON(rc == NULL) ; + XTRACEN(k_t_init,"allocate_packet_injection_tag core=%d direction=%d rc=%p", + core, direction, rc ) ; + BUG_ON( ( ((unsigned int) rc) & 0x1f) != 0 ) ; /* Need 32-byte alignment */ + return rc ; + } + +static inline packet_skb_array_t * allocate_packet_skb_array(unsigned int core, unsigned int direction) + { + packet_skb_array_t * rc = kmalloc(sizeof(packet_skb_array_t),GFP_KERNEL) ; + BUG_ON(rc == NULL) ; + XTRACEN(k_t_init,"allocate_skb_array core=%d direction=%d rc=%p", + core, direction, rc ) ; + memset(rc,0,sizeof(packet_skb_array_t)) ; + return rc ; + } + +enum { + k_idma_frame_count = 16384 /* Design choice */ +}; + +typedef struct { +#if defined(ENABLE_PACKETS) || defined(ENABLE_FRAMES) + packet_injection_memoryfifo_t * idma_fifo ; + packet_injection_tag_t * idma_tag ; + packet_skb_array_t * idma_skb_array ; + unsigned int fifo_head_index ; + unsigned int fifo_tail_index ; + unsigned int buffer_head_index ; + unsigned int buffer_tail_index ; + unsigned int fifo_initial_head ; + unsigned int packets_injected_count ; + unsigned int injection_vacant ; + unsigned int injection_high_watermark ; +#endif +#if defined(ENABLE_FRAMES) + struct sk_buff_head frame_queue ; +#endif +} idma_direction_t ; + +static inline void allocate_idma_direction(idma_direction_t * idma_direction,unsigned int core, unsigned int direction) + { +#if defined(ENABLE_PACKETS) || defined(ENABLE_FRAMES) + idma_direction->idma_fifo = allocate_packet_injection_memoryfifo(core, direction) ; + idma_direction->idma_tag = allocate_packet_injection_tag(core,direction) ; + idma_direction->idma_skb_array = allocate_packet_skb_array(core,direction) ; + idma_direction->fifo_head_index = 0 ; + idma_direction->fifo_tail_index = 0 ; + idma_direction->buffer_head_index = 0 ; + idma_direction->buffer_tail_index = 0 ; + idma_direction->injection_vacant = 0 ; + idma_direction->injection_high_watermark = 0 ; + idma_direction->packets_injected_count = 0 ; +#endif +#if defined(ENABLE_FRAMES) + skb_queue_head_init(&idma_direction->frame_queue) ; +#endif + } + +typedef struct { + idma_direction_t idma_direction[k_injecting_directions] ; + memcpy_packet_injection_memoryfifo_t *memcpy_packet_fifo ; + unsigned int memcpy_packet_fifo_head_index ; + unsigned int memcpy_packet_fifo_tail_index ; + unsigned int memcpy_fifo_initial_head ; +} idma_core_t ; + +static inline void allocate_idma_core(idma_core_t * idma_core,unsigned int core) + { + int direction ; + for( direction=0 ; direction<k_injecting_directions;direction+=1 ) + { + allocate_idma_direction(idma_core->idma_direction+direction, core, direction) ; + } + idma_core->memcpy_packet_fifo=allocate_memcpy_packet_injection_memoryfifo(core) ; + } + +typedef struct { + idma_core_t idma_core[k_injecting_cores] ; +} idma_t ; + +static inline void allocate_idma(idma_t * idma) + { + int core ; + for( core=0 ; core<k_injecting_cores;core+=1 ) + { + allocate_idma_core(idma->idma_core+core, core) ; + } + } + +/* 'per-slot' structures for demultiplexing received torus messages. */ +/* we are no longer running 1 slot per possubly-sending core, i.e. 4 per node in the partition; now running 1 per node */ +/* Get/set methods because for 'large' machines we might need bigger tables than can be kmalloced in one go */ +#if defined(ENABLE_LATENCY_TRACKING) + +typedef struct { + unsigned long long s1 ; + unsigned long long sx ; + unsigned long long sxx ; + unsigned int xmin ; + unsigned int xmax ; +} rcv_statistic_t ; + +static void rcv_statistic_clear(rcv_statistic_t *t) + { + t->s1 = 0; + t->sx = 0; + t->sxx = 0 ; + t->xmin = 0xffffffff ; + t->xmax = 0 ; + } +static void rcv_statistic_observe(rcv_statistic_t *t, unsigned int x) + { + unsigned long long ullx = x ; + unsigned long long ullxx = ullx*ullx ; + t->s1 += 1 ; + t->sx += x ; + t->sxx += ullxx ; + if( x<t->xmin ) t->xmin=x ; + if( x>t->xmax ) t->xmax=x ; + } +static unsigned int rcv_statistic_mean(rcv_statistic_t *t) + { + unsigned long long s1=t->s1 ; + unsigned long long sx=t->sx ; + unsigned long long rc = sx ; + do_div(rc,(unsigned int)s1) ; + TRACEN(k_t_detail,"sx=0x%08x%08x s1=0x%08x%08x mean=%u", + (unsigned int)(sx>>32),(unsigned int)sx, + (unsigned int)(s1>>32),(unsigned int)s1,(unsigned int)rc) ; + return (unsigned int)rc ; + } +static unsigned int rcv_statistic_variance(rcv_statistic_t *t, unsigned int m) + { + unsigned long long s1=t->s1 ; + unsigned long long sx=t->sx ; + unsigned long long sxx=t->sxx ; + unsigned long long mm=m ; + unsigned long long vv = sxx - mm*mm ; + unsigned long long rc=vv ; + do_div(rc,(unsigned int)s1) ; + TRACEN(k_t_detail,"sxx=0x%08x%08x sx=0x%08x%08x s1=0x%08x%08x mm=0x%08x%08x vv=0x%08x%08x variance=%u", + (unsigned int)(sxx>>32),(unsigned int)sxx, + (unsigned int)(sx>>32),(unsigned int)sx, + (unsigned int)(s1>>32),(unsigned int)s1, + (unsigned int)(mm>>32),(unsigned int)mm, + (unsigned int)(vv>>32),(unsigned int)vv, + (unsigned int)rc) ; + return (unsigned int)rc ; + } +#endif +/* TODO: Can this be condensed ? Should be a 'char * payload' and a 'char * payload_alert', down to 8 bytes */ +/* or could even be a 28-bit address (since we know 16-byte alignment) and a 4-bit count so we treat things */ +/* in more detail every 16 packets or when the frame is done if sooner */ +/* TODO: also: maybe the injector should flag the last packet of a frame with a different function ? */ +typedef struct { + unsigned char * payload ; + unsigned char * payload_alert ; + unsigned int expect ; + int lastcell ; + unsigned int proposals_active ; + struct sk_buff_head proposals_pending_flow ; +#if defined(USE_ADAPTIVE_ROUTING) + struct sk_buff * skb_per_conn[k_concurrent_receives] ; +#if defined(RESEQUENCE_ARRIVALS) + struct sk_buff * skb_pending_resequence[k_concurrent_receives] ; + unsigned int conn_id_pending_delivery ; +#endif +#endif +#if defined(ENABLE_LATENCY_TRACKING) + rcv_statistic_t latency ; + unsigned int basetime ; +#endif +#if defined(ENABLE_PROGRESS_TRACKING) + unsigned long long timestamp ; +#endif +} rcv_per_slot_t ; + +typedef struct { + unsigned int partner_ip_address ; + unsigned int partner_xyz ; +} learned_address_entry ; + +typedef struct { + rcv_per_slot_t * rcv_per_slot_vector ; + struct sk_buff ** skb_per_slot_vector ; +} rcv_t ; + +static inline char * get_rcv_payload(rcv_t *rcv, unsigned int slot_index) + { + return rcv->rcv_per_slot_vector[slot_index].payload ; + } + +static inline void set_rcv_payload(rcv_t *rcv, unsigned int slot_index, char * payload ) + { + rcv->rcv_per_slot_vector[slot_index].payload = payload ; + } + +static inline unsigned int get_proposals_active(rcv_t *rcv, unsigned int slot_index) + { + return rcv->rcv_per_slot_vector[slot_index].proposals_active ; + } + +static inline void set_proposals_active(rcv_t *rcv, unsigned int slot_index, unsigned int proposals_active ) + { + rcv->rcv_per_slot_vector[slot_index].proposals_active = proposals_active ; + } + +static inline char * get_rcv_payload_alert(rcv_t *rcv, unsigned int slot_index) + { + return rcv->rcv_per_slot_vector[slot_index].payload_alert ; + } + +static inline void set_rcv_payload_alert(rcv_t *rcv, unsigned int slot_index, char * payload_alert ) + { + rcv->rcv_per_slot_vector[slot_index].payload_alert = payload_alert ; + } + +static inline unsigned int get_rcv_expect(rcv_t *rcv, unsigned int slot_index) + { + return rcv->rcv_per_slot_vector[slot_index].expect ; + } + +static inline void set_rcv_expect(rcv_t *rcv, unsigned int slot_index, unsigned int expect) + { + rcv->rcv_per_slot_vector[slot_index].expect = expect ; + } + +static inline int get_rcv_lastcell(rcv_t *rcv, unsigned int slot_index) + { + return rcv->rcv_per_slot_vector[slot_index].lastcell ; + } + +static inline void set_rcv_lastcell(rcv_t *rcv, unsigned int slot_index, int lastcell) + { + rcv->rcv_per_slot_vector[slot_index].lastcell = lastcell ; + } + +static inline struct sk_buff * get_rcv_skb(rcv_t *rcv, unsigned int slot_index) + { + return rcv->skb_per_slot_vector[slot_index] ; + } + +static inline void set_rcv_skb(rcv_t *rcv, unsigned int slot_index, struct sk_buff * skb) + { + rcv->skb_per_slot_vector[slot_index] = skb ; + } + +static inline void init_pending_flow(rcv_t *rcv, unsigned int slot_index) +{ + skb_queue_head_init(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow) ; +} + +static inline void enq_pending_flow(rcv_t *rcv, unsigned int slot_index, struct sk_buff * skb) +{ + skb_queue_tail(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow,skb) ; +} + +static inline struct sk_buff * deq_pending_flow(rcv_t *rcv, unsigned int slot_index) +{ + return skb_dequeue(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow) ; +} + +static inline unsigned int count_pending_flow(rcv_t *rcv, unsigned int slot_index) +{ + return skb_queue_len(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow) ; +} + +#if defined(USE_ADAPTIVE_ROUTING) +static inline struct sk_buff * get_rcv_skb_for_conn(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id) +{ + return rcv->rcv_per_slot_vector[slot_index].skb_per_conn[conn_id & (k_concurrent_receives-1)] ; +} + +static void set_rcv_skb_for_conn(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb) __attribute__((unused)) ; +static void set_rcv_skb_for_conn(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb) +{ + rcv->rcv_per_slot_vector[slot_index].skb_per_conn[conn_id & (k_concurrent_receives-1)] = skb ; +} +#if defined(RESEQUENCE_ARRIVALS) + static inline struct sk_buff * get_rcv_skb_pending_resequence(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id) + { + return rcv->rcv_per_slot_vector[slot_index].skb_pending_resequence[conn_id & (k_concurrent_receives-1)] ; + } + static inline void set_rcv_skb_pending_resequence(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb) + { + rcv->rcv_per_slot_vector[slot_index].skb_pending_resequence[conn_id & (k_concurrent_receives-1)] = skb; + } + static inline int get_rcv_conn_pending_delivery(rcv_t *rcv, unsigned int slot_index) + { + return rcv->rcv_per_slot_vector[slot_index].conn_id_pending_delivery ; + } + static void set_rcv_conn_pending_delivery(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id) __attribute__((unused)) ; + static void set_rcv_conn_pending_delivery(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id) + { + rcv->rcv_per_slot_vector[slot_index].conn_id_pending_delivery=conn_id ; + } + +#endif + +#endif + +static inline unsigned long long get_timestamp(rcv_t *rcv, unsigned int slot_index) + { +#if defined(ENABLE_PROGRESS_TRACKING) + return rcv->rcv_per_slot_vector[slot_index].timestamp ; +#else + return 0 ; +#endif + } + +static inline void set_timestamp(rcv_t *rcv, unsigned int slot_index, unsigned long long timestamp) + { +#if defined(ENABLE_PROGRESS_TRACKING) + rcv->rcv_per_slot_vector[slot_index].timestamp=timestamp ; +#endif + } + +enum { + k_slots_per_node = 1 , /* down from 4 ... */ + k_connids_per_node = 128 /* Number of conn-ids we track per node on the sending side */ +}; +static inline void allocate_rcv(rcv_t *rcv, unsigned int node_count) + { + rcv->rcv_per_slot_vector = kmalloc(k_slots_per_node*node_count*sizeof(rcv_per_slot_t), GFP_KERNEL) ; + BUG_ON(NULL == rcv->rcv_per_slot_vector) ; + memset(rcv->rcv_per_slot_vector,0,k_slots_per_node*node_count*sizeof(rcv_per_slot_t)) ; + rcv->skb_per_slot_vector = kmalloc(k_slots_per_node*node_count*sizeof(struct sk_buff *), GFP_KERNEL) ; + BUG_ON(NULL == rcv->skb_per_slot_vector) ; + BUG_ON(NULL == rcv->skb_per_slot_vector) ; + { + unsigned int slot ; + for(slot=0;slot<node_count;slot+=1) + { + init_pending_flow(rcv,slot) ; + } + } + } + +#if defined(USE_ADAPTIVE_ROUTING) + +extern ulong bgp_dma_adaptive_frame_limit ; + +typedef struct { + atomic_t * conn_id ; +#if defined(USE_SKB_TO_SKB) + struct sk_buff **skb ; +#endif +} tx_t ; + +static inline void init_tx_conn_id(tx_t *tx, unsigned int slot_index) +{ + atomic_set(tx->conn_id+slot_index,0xffffffff) ; +} + +static inline void allocate_tx(tx_t *tx, unsigned int node_count) + { + tx->conn_id = kmalloc(k_slots_per_node*node_count*sizeof(atomic_t), GFP_KERNEL) ; + BUG_ON(NULL == tx->conn_id) ; + { + int x ; + for(x=0;x<node_count;x+=1) + { + init_tx_conn_id(tx,x) ; + } + } +#if defined(USE_SKB_TO_SKB) + tx->skb = kmalloc(k_connids_per_node*node_count*sizeof(struct sk_buff *),GFP_KERNEL) ; +#endif + BUG_ON(NULL == tx->skb) ; + memset(tx->skb,0,k_connids_per_node*node_count*sizeof(struct sk_buff *)) ; + } + +static inline unsigned int take_tx_conn_id(tx_t *tx, unsigned int slot_index) +{ + unsigned int rc= atomic_inc_return(tx->conn_id+slot_index) ; + TRACEN(k_t_general,"slot_index=0x%08x conn_id=0x%08x",slot_index,rc) ; + return rc ; +} +#if defined(USE_SKB_TO_SKB) +static inline struct sk_buff * get_tx_skb(tx_t *tx, unsigned int slot_index, unsigned int conn_id) +{ + return tx->skb[slot_index*k_connids_per_node+(conn_id & (k_connids_per_node-1))] ; +} +static inline void set_tx_skb(tx_t *tx, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb) +{ + tx->skb[slot_index*k_connids_per_node+(conn_id & (k_connids_per_node-1))] = skb ; +} + +#endif + +#endif + +/* End of 'packets' style section */ +enum { + k_desired_reception_memory_fifo_size = +#if defined(CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT) + 1 << (CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT) +#else + 1 << 22 /* Try 4MB as a static region, if not set externally */ +/* 1 << 20 // Try 1MB as a static region, if not set externally */ +#endif +} ; +enum { + k_metadata_injection_memory_fifo_size = 4096 , + k_bulk_injection_memory_fifo_size = 4096 +}; + +typedef struct { + char buffer[k_metadata_injection_memory_fifo_size] ; +} metadata_injection_memoryfifo_t ; + +typedef struct { + char buffer[k_bulk_injection_memory_fifo_size] ; +} bulk_injection_memoryfifo_t ; + + +#if defined(BARRIER_WITH_IOCTL) +enum { + k_diag_target_data_size = 1<<20 , /* Aim up to 1MB ... */ + k_diag_packet_count = k_diag_target_data_size/k_injection_packet_size , /* Rounding down for packets ... */ +}; +typedef struct { + char buffer[k_diag_target_data_size] ; +} diag_block_buffer_t ; + +static inline diag_block_buffer_t * allocate_diag_block_buffer(void) +{ + diag_block_buffer_t * result = kmalloc(k_diag_target_data_size,GFP_KERNEL) ; + BUG_ON(NULL == result) ; + return result ; +} + +static inline unsigned int * allocate_shuffle_vector(unsigned int xe, unsigned int ye, unsigned int ze) +{ + unsigned int * result = kmalloc(xe*ye*ze*sizeof(unsigned int),GFP_KERNEL) ; + BUG_ON(NULL == result) ; + return result ; +} +#endif + + +enum { + k_Dimensionality = 3 +}; + +typedef struct { + unsigned char coordinate[k_Dimensionality] ; +} torusLocation_t ; + +typedef enum { + k_send_propose_rpc , + k_act_propose_rpc , + k_send_accept_rpc , + k_act_accept_rpc , + + k_defer_accept_rpc_counters , + k_defer_accept_rpc_nodeflow , + k_send_eager , + k_receive_eager , + + k_no_reception_counter , + k_parked , + k_scattergather , + k_receive_incomplete , + + k_headlength_zero , + k_fraglength_zero , + k_accept_audit_fail , + k_receive_audit_fail , + + k_counted_length_mismatch , + k_reordered , + k_queue_filled_propose_fifo , + + + k_flow_counters +} flowpoint_e ; + +#if defined(CONFIG_BGP_STATISTICS) +extern int reception_fifo_histogram[33] ; +extern int reception_hi_watermark ; +extern int rtt_histogram[33] ; +extern int transit_histogram[33] ; +#endif + +enum { + k_pending_rcv_skb_classes = 6 +}; +typedef struct { + struct sk_buff_head pending_rcv_skbs ; /* List of sk_buffs awaiting a reception counter */ + unsigned int outstanding_counters ; /* Number of counters awaiting completion in this direction */ +} bgp_dma_balancer_direction ; +typedef struct { + bgp_dma_balancer_direction b[k_pending_rcv_skb_classes] ; +} bgp_dma_balancer ; +typedef struct { + torusLocation_t location ; + torusLocation_t extent ; + /* Number of bits required to represent a node in each torus dimension */ + unsigned int xbits ; + unsigned int ybits ; + unsigned int zbits ; + + DMA_RecFifoGroup_t * recFifoGroup; + rcv_t rcvdemux ; /* Reception demultiplex */ +#if defined(USE_ADAPTIVE_ROUTING) + tx_t tx_mux ; /* Transmission multiplexer (conn_ids by slot) */ +#endif + unsigned int node_count ; /* Total number of nodes in the block */ + unsigned int node_slot_mask ; /* ((node_count << 2)-1) , for bit-masking to firewall check received data */ +#ifdef ENABLE_PACKETS + DMA_InjFifoGroup_t injFifoGroupPackets; + int injFifoPacketsIds[ k_injecting_cores*k_injecting_directions ]; + int proto_issue_packets ; + + /* End of packets-style interface */ +#endif + idma_t idma ; /* Injection DMA buffering */ +#ifdef ENABLE_PACKETS + unsigned short int injFifoPacketsPri[ k_injecting_cores*k_injecting_directions ] ; + unsigned short int injFifoPacketsLoc[ k_injecting_cores*k_injecting_directions ] ; + unsigned char injFifoPacketsMap[ k_injecting_cores*k_injecting_directions ] ; +#endif + struct sk_buff_head inj_queue[k_injecting_directions] ; /* Lists of skb's queued because DMA buffers have no space */ + unsigned int packets_received_count ; + struct timer_list runway_check_timer ; + struct timer_list transmission_free_skb_timer ; +#if defined(HAS_MISSED_INTERRUPT_TIMER) + struct timer_list torus_missed_interrupt_timer ; +#endif +#ifdef ENABLE_FRAMES + DMA_InjFifoGroup_t injFifoGroupFrames; + int injFifoFramesIds[ k_injecting_cores*k_injecting_directions ]; + int proto_issue_frames_single ; +#if defined(USE_ADAPTIVE_ROUTING) + int proto_issue_frames_adaptive ; +#endif +#if defined(USE_SKB_TO_SKB) + int proto_transfer_propose ; + int eager_limit ; /* frames larger than this to be sent with skb-to-skb DMA */ + int flow_counter[k_flow_counters] ; +#endif +#if defined(BARRIER_WITH_IOCTL) + int proto_issue_diag_sync ; + diag_block_buffer_t * diag_block_buffer ; + unsigned int * shuffle_vector ; + unsigned int shuffle_seed ; + int prev_tbl ; + unsigned int timing_histogram_buckets[33] ; +#endif + unsigned short int injFifoFramesPri[ k_injecting_cores*k_injecting_directions ] ; + unsigned short int injFifoFramesLoc[ k_injecting_cores*k_injecting_directions ] ; + unsigned char injFifoFramesMap[ k_injecting_cores*k_injecting_directions ] ; +#endif + + DMA_CounterGroup_t injCounterGroup; + DMA_CounterGroup_t recCounterGroup; + + void * receptionfifo ; + unsigned int receptionfifoSize ; + + unsigned int mtu ; + unsigned int max_packets_per_frame ; + + DMA_RecFifoMap_t recMap; /* rec fifo map structure */ + + + +#if defined(USE_SKB_TO_SKB) + int injCntrSubgrps[ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ] ; + int recCntrSubgrps[ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ] ; + char recCntrInUse [ DMA_NUM_COUNTERS_PER_GROUP ] ; + int qtyFreeRecCounters ; + int scanRecCounter ; + struct sk_buff * inj_skbs[DMA_NUM_COUNTERS_PER_GROUP] ; + struct sk_buff * rcv_skbs[DMA_NUM_COUNTERS_PER_GROUP] ; + unsigned int slot_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ; + unsigned char conn_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ; + int rcv_timestamp[DMA_NUM_COUNTERS_PER_GROUP] ; + int rcv_checked_time ; + bgp_dma_balancer balancer ; + atomic_t framesProposed ; + unsigned int framesDisposed ; +#endif + unsigned short int memcpyInjFifoFramesPri[ k_injecting_cores ] ; + unsigned short int memcpyInjFifoFramesLoc[ k_injecting_cores ] ; + unsigned char memcpyInjFifoFramesMap[ k_injecting_cores ] ; + DMA_InjFifoGroup_t memcpyInjFifoGroupFrames; + int memcpyInjFifoFramesIds[ k_injecting_cores ]; + DMA_CounterGroup_t memcpyRecCounterGroup; + int memcpyRecCntrSubgrps[ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ] ; + + int proto_diagnose ; /* 'diagnose' frame to software reception FIFO */ + + unsigned int SW_Arg ; /* / 'Software Arg', we send our {x,y,z} */ + unsigned int src_key ; /* 'source key', we send rank */ + + + spinlock_t dirInjectionLock[k_injecting_cores*k_injecting_directions] ; /* serialise access to injection FIFOs */ + + void * previousActor ; /* FIFO address of previous Actor, for detecting replays */ + + + /* sysctl entries */ + struct ctl_table_header * sysctl_table_header ; +/* Statistics */ + + struct net_device_stats * device_stats ; + unsigned int count_no_skbuff ; + unsigned int tx_by_core[4] ; + unsigned int tx_in_use_count[k_injecting_directions+1] ; +#if defined(KEEP_TCP_FLAG_STATS) + unsigned int tcp_received_flag_count[8] ; +#endif +/* Tuning parameters */ + int tuning_num_packets ; /* = 1 , number of packets to process per poll call */ + int tuning_num_empty_passes ; /* = 512 , number of times to spin before returning */ + int tuning_non_empty_poll_delay ; /* = 850 , number of cycles to spin between looks at the FIFO */ + int tuning_poll_after_enabling ; /* = 1 , whether to poll again after enabling for interrupts */ + int tuning_run_handler_on_hwi ; /* = 1 , whether to run the hander on FIFO hardware interrupts (as well as rDMA ones) */ + int tuning_clearthresh_slih ; /* = 1 , whether to clear the 'threshold crossed' bit in the slih */ + int tuning_clearthresh_flih ; /* = 1 , whether to clear the 'threshold crossed' bit in the flih */ + int tuning_disable_in_dcr ; /* = 1, whether to toggle the DCR interrupt enable/disable */ + int tuning_injection_hashmask ; /* = 3, whether to mask down the number of injection FIFOs in use per direction */ + + int tuning_recfifo_threshold ; /* for moving to/from DCR */ + int tuning_dcr_c8b ; /* for moving to/from DCR */ + int tuning_enable_hwfifo ; /* For registering/unregistering 'hardware FIFO' interrupts */ + + int tuning_exploit_reversepropose ; /* Whether to try the 'reverse propose' protocol */ + int tuning_counters_per_source ; /* How many reception counters to commit per source node */ + int tuning_defer_skb_until_counter ; /* Whether to defer sk_buff allocation until a reception counter is available */ + int tuning_deliver_eagerly ; /* Whether to skip the 'resequence arrivals' step */ + int tuning_diagnose_rst ; /* Whether to cut trace records when being asked to send a TCP segment with a 'rst' */ + + int tuning_select_fifo_algorithm ; /* Which FIFO selection algorithm to use (head-of-line block minimisation) */ + + int tuning_min_icsk_timeout ; /* What to push ICSK retransmit timeout up to if we find it low */ + + int tuning_virtual_channel ; /* Which virtual channel to use (i.e. whether to force deterministic routing) */ + + unsigned int block_id ; + unsigned char i_am_compute_node ; + unsigned char bluegene_tcp_is_built ; + unsigned char is_torus_x ; + unsigned char is_torus_y ; + unsigned char is_torus_z ; + unsigned char last_queue_picked ; +#if defined(CONFIG_BGP_STATISTICS) + unsigned int resequence_histogram[k_concurrent_receives] ; + unsigned long long bytes_sent ; + unsigned long long bytes_received ; +#endif +} dma_tcp_t ; + +typedef enum { + k_VC_ordering = DMA_PACKET_VC_BN , /* virtual channel to use when we want to order things, 'Bubble Normal' */ + k_VC_anyway = DMA_PACKET_VC_D0 /* virtual channel to use otherwise ... 'Dynamic 0' */ +} VC_e ; + +static inline unsigned int virtual_channel(dma_tcp_t *dma_tcp, VC_e channel_hint) +{ + return k_configurable_virtual_channel ? dma_tcp->tuning_virtual_channel : channel_hint ; +} + +static inline void instrument_flow(dma_tcp_t *dma_tcp,flowpoint_e flowpoint) +{ + dma_tcp->flow_counter[flowpoint] += 1 ; +} + +static inline unsigned int flow_count(dma_tcp_t *dma_tcp,flowpoint_e flowpoint) +{ + return dma_tcp->flow_counter[flowpoint] ; +} + +extern dma_tcp_t dma_tcp_state ; + +void bgp_dma_tcp_display_pending_slots(dma_tcp_t * dma_tcp, unsigned int nodecount ) ; +void bgp_dma_diag_reissue_rec_counters(dma_tcp_t *dma_tcp) ; + +void bgp_dma_tcp_empty_fifo_callback(void) ; + +extern void bluegene_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ; +extern void bluegene_bic_disable_irq(unsigned int irq) ; + +int bgnet_receive_torus(struct sk_buff * skb) ; +int bgtornet_receive_torus(struct sk_buff * skb) ; +struct net_device_stats *bgtornet_stats(void) ; + +void bgtornet_rx_schedule(void) ; + + +static inline int DMA_CounterSetValueWideOpen( + DMA_Counter_t *c_sw, + unsigned int value + ) +{ + unsigned int pa_base=0, pa_max=0xffffffff; + SPI_assert( c_sw != NULL ); + c_sw->pa_base = pa_base; + c_sw->pa_max = pa_max; + + /* + * Write the value, base, and max to the hardware counter + */ + DMA_CounterSetValueBaseMaxHw(c_sw->counter_hw_ptr, + value, + pa_base, + pa_max); + + return (0); +} + +static inline int DMA_CounterSetValueWideOpenById( + DMA_CounterGroup_t *cg_ptr, + int counter_id , + unsigned int value + ) + { + int rc; + + SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) ); + SPI_assert( cg_ptr != NULL ); + SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] & + _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 ); + + rc = DMA_CounterSetValueWideOpen( &cg_ptr->counter[counter_id], value ) ; + + /* Note: it is assumed that the above function call performs an MBAR */ + + return rc; + + } + +/* Choose a transmission FIFO for a stream. This is 'approximately' the deterministic routing algorithm */ +/* (I think it is 'exactly' the deterministic routing algorithm, with the possible exception of what the hardware will do */ +/* if you send a packet to something half-way-round in one of the torus dimensions) */ +/* Return -1 if it is an attempted 'self-send'; this has to be done as a local DMA or a memcpy, not as a torus op */ +static int select_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x, unsigned int y, unsigned int z) __attribute__ ((unused)) ; +static inline int sign_extend(int d, unsigned int bb) +{ + return (d << (32-bb)) >> (32-bb) ; +} +static inline int resolve_direction(int d, unsigned int is_torus, unsigned int bb, int v0, int v1) +{ + if( is_torus) d = sign_extend(d,bb) ; + return (d<0) ? v1 : v0 ; +} +static int select_transmission_fifo_v(dma_tcp_t *dma_tcp, unsigned int x0,unsigned int x, unsigned int y0,unsigned int y, unsigned int z0,unsigned int z) + { + switch(dma_tcp->tuning_select_fifo_algorithm) + { + case 0: + case 1: + { + int dx = x0-x ; + int dy = y0-y ; + int dz = z0-z ; + if( dx != 0 ) return resolve_direction(dx, dma_tcp->is_torus_x,dma_tcp->xbits, 1, 0) ; + if( dy != 0 ) return resolve_direction(dy, dma_tcp->is_torus_y,dma_tcp->ybits, 3, 2) ; + return resolve_direction(dz,dma_tcp->is_torus_z,dma_tcp->zbits, 5, 4) ; + } + default: + /* rank modulo 6 ... */ + return ((x<<(dma_tcp->ybits+dma_tcp->zbits)) | (y<<(dma_tcp->zbits)) | (z)) % 6 ; + + } + } + +static int select_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x, unsigned int y, unsigned int z) +{ + return select_transmission_fifo_v(dma_tcp,dma_tcp->location.coordinate[0],x,dma_tcp->location.coordinate[1],y,dma_tcp->location.coordinate[2],z) ; +} + +/* Report the transmission FIFO that a remote node will use to reach this node */ +static int report_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x0, unsigned int y0, unsigned int z0) __attribute__ ((unused)) ; +static int report_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x0, unsigned int y0, unsigned int z0) +{ + return select_transmission_fifo_v(dma_tcp,x0,dma_tcp->location.coordinate[0],y0,dma_tcp->location.coordinate[1],z0,dma_tcp->location.coordinate[2]) ; +} + + + +int handleSocketsRecvMsgActor(DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) ; +int handleSocketsRecvMsgCompletedActor(DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) ; +int handleSocketsBufferActor(DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) ; + + +#ifdef ENABLE_PACKETS +void dma_tcp_packets_init(dma_tcp_t *dma_tcp) ; +int bgp_dma_tcp_send_and_free_packets( struct sk_buff *skb + ) ; +void dma_tcp_packets_show_counts(dma_tcp_t *dma_tcp) ; + +#endif +#ifdef ENABLE_FRAMES +void dma_tcp_frames_init(dma_tcp_t *dma_tcp) ; +int bgp_dma_tcp_send_and_free_frames( struct sk_buff *skb + ) ; +#endif + +/* ethem codings are ... */ +/* 0 : run things on the tree */ +/* 1 : run things with 'actors' and DMA to/from SKBUFFs */ +/* 2 : run things with 'messages' between memory FIFOs */ +/* 3 : send both (1) and (2), for bringup. */ +/* until it's working correctly, we will deliver the '1' eth frames and discard the '2' eth frames at the receiver. */ +/* Additionally we can set a '4' bit, which will send packets over the tree; */ +/* so we could set '6' and get a working tree drive, and 'messages' flows to go through the motions on a prototype driver without any 'actors' flows */ + +extern int bgp_dma_ethem ; + +/********************************************************************** + * DCR access wrapper + **********************************************************************/ + +static inline uint32_t mfdcrx(uint32_t dcrn) +{ + uint32_t value; + asm volatile ("mfdcrx %0,%1": "=r" (value) : "r" (dcrn) : "memory"); + return value; +} + +static inline void mtdcrx(uint32_t dcrn, uint32_t value) +{ + asm volatile("mtdcrx %0,%1": :"r" (dcrn), "r" (value) : "memory"); +} + + +static void dumpdmadcrs(unsigned int tracelevel) __attribute__ ((unused)) ; +static void dumpdmadcrs(unsigned int tracelevel) + { + int x ; + for(x=0xd00; x<=0xdff ; x += 8 ) + { + int d0 __attribute__ ((unused)) = mfdcrx(x) ; + int d1 __attribute__ ((unused)) = mfdcrx(x+1) ; + int d2 __attribute__ ((unused)) = mfdcrx(x+2) ; + int d3 __attribute__ ((unused)) = mfdcrx(x+3) ; + int d4 __attribute__ ((unused)) = mfdcrx(x+4) ; + int d5 __attribute__ ((unused)) = mfdcrx(x+5) ; + int d6 __attribute__ ((unused)) = mfdcrx(x+6) ; + int d7 __attribute__ ((unused)) = mfdcrx(x+7) ; + TRACEN(tracelevel,"Torus DMA dcrs 0x%04x %08x %08x %08x %08x %08x %08x %08x %08x", + x,d0,d1,d2,d3,d4,d5,d6,d7 + ) ; + } + } + +static void dumptorusdcrs(void) __attribute__ ((unused)) ; +static void dumptorusdcrs(void) + { + int x ; + for(x=0xc80; x<=0xc8f ; x += 8 ) + { + int d0 __attribute__ ((unused)) = mfdcrx(x) ; + int d1 __attribute__ ((unused)) = mfdcrx(x+1) ; + int d2 __attribute__ ((unused)) = mfdcrx(x+2) ; + int d3 __attribute__ ((unused)) = mfdcrx(x+3) ; + int d4 __attribute__ ((unused)) = mfdcrx(x+4) ; + int d5 __attribute__ ((unused)) = mfdcrx(x+5) ; + int d6 __attribute__ ((unused)) = mfdcrx(x+6) ; + int d7 __attribute__ ((unused)) = mfdcrx(x+7) ; + TRACEN(k_t_request,"Torus control dcrs 0x%04x %08x %08x %08x %08x %08x %08x %08x %08x\n", + x,d0,d1,d2,d3,d4,d5,d6,d7 + ) ; + } + } + +#if defined(REQUIRES_DUMPMEM) +static inline char cfix(char x) __attribute__ ((unused)) ; +static void dumpmem(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ; +static void dumpframe(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ; + +static inline char cfix(char x) + { + return ( x >= 0x20 && x < 0x80 ) ? x : '.' ; + } +static void dumpmem(const void *address, unsigned int length, const char * label) + { + int x ; + TRACEN(k_t_fifocontents|k_t_scattergather|k_t_request,"(>)Memory dump length=0x%08x: %s",length,label) ; + for (x=0;x<length;x+=32) + { + int *v __attribute__ ((unused)) = (int *)(address+x) ; + char *c __attribute__ ((unused)) = (char *)(address+x) ; + TRACEN(k_t_fifocontents|k_t_scattergather|k_t_request,"%p: %08x %08x %08x %08x %08x %08x %08x %08x %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c", + v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7], + cfix(c[0]),cfix(c[1]),cfix(c[2]),cfix(c[3]), + cfix(c[4]),cfix(c[5]),cfix(c[6]),cfix(c[7]), + cfix(c[8]),cfix(c[9]),cfix(c[10]),cfix(c[11]), + cfix(c[12]),cfix(c[13]),cfix(c[14]),cfix(c[15]), + cfix(c[16]),cfix(c[17]),cfix(c[18]),cfix(c[19]), + cfix(c[20]),cfix(c[21]),cfix(c[22]),cfix(c[23]), + cfix(c[24]),cfix(c[25]),cfix(c[26]),cfix(c[27]), + cfix(c[28]),cfix(c[29]),cfix(c[30]),cfix(c[31]) + ) ; + } + TRACEN(k_t_fifocontents|k_t_scattergather|k_t_request,"(<)Memory dump") ; + } + +static void dumpframe(const void *address, unsigned int length, const char * label) + { + int x ; + unsigned int limlen = (length>1024) ? 1024 : length ; + TRACEN(k_t_fifocontents,"(>)ethframe dump length=%d: %s",length,label) ; + for (x=0;x<limlen;x+=32) + { + int *v __attribute__ ((unused)) = (int *)(address+x) ; + char *c __attribute__ ((unused)) = (char *)(address+x) ; + TRACEN(k_t_fifocontents,"%p: %08x %08x %08x %08x %08x %08x %08x %08x %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c", + v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7], + cfix(c[0]),cfix(c[1]),cfix(c[2]),cfix(c[3]), + cfix(c[4]),cfix(c[5]),cfix(c[6]),cfix(c[7]), + cfix(c[8]),cfix(c[9]),cfix(c[10]),cfix(c[11]), + cfix(c[12]),cfix(c[13]),cfix(c[14]),cfix(c[15]), + cfix(c[16]),cfix(c[17]),cfix(c[18]),cfix(c[19]), + cfix(c[20]),cfix(c[21]),cfix(c[22]),cfix(c[23]), + cfix(c[24]),cfix(c[25]),cfix(c[26]),cfix(c[27]), + cfix(c[28]),cfix(c[29]),cfix(c[30]),cfix(c[31]) + ) ; + } + TRACEN(k_t_fifocontents,"(<)ethframe dump") ; + } +#else +static inline void dumpmem(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ; +static inline void dumpmem(const void *address, unsigned int length, const char * label) + { + } +static void dumpframe(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ; +static void dumpframe(const void *address, unsigned int length, const char * label) + { + } +#endif + +static void dumpRecFifoGroup(DMA_RecFifoGroup_t * recFifoGroup) __attribute__ ((unused)) ; +static void dumpRecFifoGroup(DMA_RecFifoGroup_t * recFifoGroup) + { + TRACEN(k_t_request,"(>)recFifoGroup=%p",recFifoGroup) ; + if( recFifoGroup != NULL ) + { + TRACEN(k_t_request,"group_id=%d num_normal_fifos=%d num_hdr_fifos=%d mask=%08x status_ptr=%p", + recFifoGroup->group_id,recFifoGroup->num_normal_fifos,recFifoGroup->num_hdr_fifos,recFifoGroup->mask,recFifoGroup->status_ptr + ) ; + TRACEN(k_t_request,"not_empty=%08x%08x available=%08x%08x threshold_crossed=%08x%08x", + recFifoGroup->status_ptr->not_empty[0],recFifoGroup->status_ptr->not_empty[1], + recFifoGroup->status_ptr->available[0],recFifoGroup->status_ptr->available[1], + recFifoGroup->status_ptr->threshold_crossed[0],recFifoGroup->status_ptr->threshold_crossed[1] + ) ; + TRACEN(k_t_request,"fifos[0] global_fifo_id=%d type=%d num_packets_processed_since_moving_fifo_head=%d", + recFifoGroup->fifos[0].global_fifo_id, + recFifoGroup->fifos[0].type, + recFifoGroup->fifos[0].num_packets_processed_since_moving_fifo_head + ) ; + TRACEN(k_t_request,"fifos[0] fifo_hw_ptr=%p free_space=%08x fifo_size=%08x pa_start=%08x va_start=%p va_head=%p va_tail=%p va_end=%p %s", + recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr, + recFifoGroup->fifos[0].dma_fifo.free_space, + recFifoGroup->fifos[0].dma_fifo.fifo_size, + recFifoGroup->fifos[0].dma_fifo.pa_start, + recFifoGroup->fifos[0].dma_fifo.va_start, + recFifoGroup->fifos[0].dma_fifo.va_head, + recFifoGroup->fifos[0].dma_fifo.va_tail, + recFifoGroup->fifos[0].dma_fifo.va_end, + (recFifoGroup->fifos[0].dma_fifo.free_space != recFifoGroup->fifos[0].dma_fifo.fifo_size) ? "!!!" : "" + ) ; + if( recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr != NULL ) + { + TRACEN(k_t_request,"hwfifos[0] pa_start=%08x pa_end=%08x pa_head=%08x pa_tail=%08x %s", + recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_start, + recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_end, + recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_head, + recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_tail, + (recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_head != recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_tail) ? "!!!" : "" + ) ; + } + } + TRACEN(k_t_request,"(<)") ; + + } + +static void dumpInjFifoGroup(DMA_InjFifoGroup_t * injFifoGroup) __attribute__ ((unused)) ; +static void dumpInjFifoGroup(DMA_InjFifoGroup_t * injFifoGroup) + { + TRACEN(k_t_request,"(>)injFifoGroup=%p",injFifoGroup) ; + if( injFifoGroup != NULL ) + { + DMA_InjFifoStatus_t *injStatus = injFifoGroup->status_ptr ; + int x ; + TRACEN(k_t_request,"status_ptr=%p permissions=0x%08x group_id=%d", + injFifoGroup->status_ptr, injFifoGroup->permissions, injFifoGroup->group_id) ; + if( injStatus) + { + unsigned int available = injStatus->available ; + TRACEN(k_t_request,"status not_empty=0x%08x available=0x%08x threshold_crossed=0x%08x activated=0x%08x", + injStatus->not_empty, available, injStatus->threshold_crossed, injStatus->activated + ) ; + for( x=0; x<DMA_NUM_INJ_FIFOS_PER_GROUP; x+=1) + { + if( (0x80000000 >> x) & available) + { + DMA_InjFifo_t *fifo=injFifoGroup->fifos+x ; + DMA_FifoHW_t *hw_ptr = fifo->dma_fifo.fifo_hw_ptr ; + if( fifo->occupiedSize) + { + TRACEN(k_t_request, " fifos[%d] fifo_id=%d desc_count=0x%08x%08x occupiedSize=0x%08x priority=%d local=%d ts_inj_map=0x%02x %s", + x, fifo->fifo_id, (unsigned int)(fifo->desc_count >> 32),(unsigned int)(fifo->desc_count), fifo->occupiedSize, fifo->priority, fifo->local, fifo->ts_inj_map, + (fifo->occupiedSize) ? "!!!" : "" + ) ; + } + if( fifo->dma_fifo.va_head != fifo->dma_fifo.va_tail) + { + TRACEN(k_t_request," fifos[%d] fifo_hw_ptr=%p free_space=%08x fifo_size=%08x pa_start=%08x va_start=%p va_head=%p va_tail=%p va_end=%p", + x, + hw_ptr, + fifo->dma_fifo.free_space, + fifo->dma_fifo.fifo_size, + fifo->dma_fifo.pa_start, + fifo->dma_fifo.va_start, + fifo->dma_fifo.va_head, + fifo->dma_fifo.va_tail, + fifo->dma_fifo.va_end + ) ; + } + if( hw_ptr) + { + if( hw_ptr->pa_head != hw_ptr->pa_tail) + { + TRACEN(k_t_request," hwfifos[%d] pa_start=%08x pa_end=%08x pa_head=%08x pa_tail=%08x %s", + x, + hw_ptr->pa_start, + hw_ptr->pa_end, + hw_ptr->pa_head, + hw_ptr->pa_tail, + (hw_ptr->pa_head != hw_ptr->pa_tail) ? "!!!" : "" + ) ; + } + } + } + } + } + } + TRACEN(k_t_request,"(<)") ; + } + +static void bgp_dma_tcp_set_mtu(dma_tcp_t *dma_tcp, unsigned int mtu) __attribute__ ((unused)) ; +static void bgp_dma_tcp_set_mtu(dma_tcp_t *dma_tcp, unsigned int mtu) + { + unsigned int max_packets_per_frame=(mtu+k_torus_link_payload_size-1) / k_torus_link_payload_size ; + unsigned int max_packets_per_frame2=(mtu+k_injection_packet_size-1) / k_injection_packet_size ; + unsigned int mtu1=max_packets_per_frame * k_torus_link_payload_size + k_torus_skb_alignment ; + unsigned int mtu2=max_packets_per_frame2 * k_injection_packet_size + k_torus_skb_alignment ; + dma_tcp->max_packets_per_frame = max_packets_per_frame ; + dma_tcp->mtu = (mtu1>mtu2) ? mtu1 : mtu2 ; + } + +/* Test if we think a socket is affected by torus congestion */ +unsigned int bgp_torus_congestion(struct sock *sk) ; + + +static inline unsigned int stack_pointer(void) +{ + uint32_t value; + asm volatile ("mr %0,1": "=r" (value) ); + return value; +} + +/* Fragment reassembly control for 'frames' */ +/* + * When the first packet of a frame arrives, examine the eth and ip headers to allocate a skbuff which will have + * enough data for the frame. Arrange to assemble the first fragment into the data area. + * + * When the last packet of a fragment arrives, we know whether the frame is complete. If it is a one-frag frame, + * hand it off. I + */ + +typedef struct +{ + unsigned int frame_size ; /* IP frame size, from IP header */ + unsigned int frag_size ; /* fragment size */ + unsigned int frag_pad_head ; /* Displacement of first byte of first fragment from alignment */ + unsigned int fragment_index ; /* Index of fragment, starts at 0 */ + unsigned int bytes_accounted_for ; /* Number of bytes in accounted for including the current fragment */ + unsigned char * frag_base ; /* Where to pack this frag down to */ + unsigned char * frag_data ; /* First byte free after current fragment is received */ + unsigned char * frag_payload ; /* Aligned address to drop first packet of next fragment into skb */ +} fragment_reassembler; + +static inline fragment_reassembler * frag_re(struct sk_buff *skb) +{ + return (fragment_reassembler *) &(skb->cb) ; +} + +void dma_tcp_show_reception(dma_tcp_t * dma_tcp) ; + +int proc_do_dma_rec_counters(struct ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) ; +extern int bgp_dma_tcp_counter_copies[DMA_NUM_COUNTERS_PER_GROUP] ; +static void show_dma_descriptor(DMA_InjDescriptor_t *d) __attribute((unused)) ; +static void show_dma_descriptor(DMA_InjDescriptor_t *d) +{ + unsigned int * di = (unsigned int *) d ; + TRACEN(k_t_request,"DMA_InjDescriptor_t(0x%08x 0x%08x 0x%08x 0x%08x (0x%08x 0x%08x 0x%08x 0x%08x))", + d->word1, d->word2, d->base_offset, d->msg_length, d->hwHdr.word0, d->hwHdr.word1, d->hwHdr.word2, d->hwHdr.word3) ; + TRACEN(k_t_request,"prefetch_only=%d local_copy=%d",(di[0] >> 1)& 1,di[0] & 1) ; +} + +typedef struct +{ + long long int sxx ; + long long int sxy ; +/* long long int m0 ; */ +/* long long int m1 ; */ +/* long long int det ; */ + int s1 ; + int sx ; + int sy ; +} dma_statistic_t ; +extern dma_statistic_t bgp_dma_rate ; + +enum { + k_injCounterId = 0 // Injection counter number to use +} ; + +/* Support for freeing 'a few' skbuffs when outbound DMA is complete each time we go around */ +enum { + k_skb_group_count = 8 +}; +typedef struct { + unsigned int count ; + struct sk_buff * group[k_skb_group_count] ; +} skb_group_t ; +static void skb_group_init(skb_group_t * skb_group) __attribute__((unused)) ; +static void skb_group_init(skb_group_t * skb_group) +{ + skb_group->count = 0 ; +} + + +static void skb_group_add(skb_group_t * skb_group, struct sk_buff * skb) __attribute__((unused)) ; +static void skb_group_add(skb_group_t * skb_group, struct sk_buff * skb) +{ + unsigned int count=skb_group->count ; + if( count < k_skb_group_count ) + { + skb_group->group[count] = skb ; + TRACEN(k_t_general,"Queueing skb_group->group[%d]=%p for free",count,skb) ; + skb_group->count = count+1 ; + } + else + { + TRACEN(k_t_error,"Overrunning queue of skbs to free skb=%p",skb) ; + dev_kfree_skb(skb) ; + } +} +static void skb_group_free(skb_group_t * skb_group) __attribute__((unused)) ; +static void skb_group_free(skb_group_t * skb_group) +{ + unsigned int count=skb_group->count ; + unsigned int index ; + struct sk_buff ** skb_array=skb_group->group ; + BUG_ON(count > k_skb_group_count) ; + if( count > k_skb_group_count) count=k_skb_group_count ; + for(index=0;index<count;index+=1) + { + TRACEN(k_t_general,"freeing skb_array[%d]=%p",index,skb_array[index]) ; + if( skb_array[index]) + { + dev_kfree_skb(skb_array[index]) ; + skb_array[index]=NULL ; + } + } +} + +static void skb_group_queue_seq(skb_group_t * group, struct sk_buff ** skb_array, unsigned int count +#if defined(TRACK_LIFETIME_IN_FIFO) + , unsigned int core, unsigned int desired_fifo, unsigned long long now, unsigned int x +#endif + ) +{ + unsigned int index ; + + for( index=0 ; index<count; index+=1) + { + if( skb_array[index]) + { +#if defined(TRACK_LIFETIME_IN_FIFO) + struct sk_buff *skb=skb_array[index] ; + unsigned long long lifetime_in_fifo = now - *(unsigned long long *) skb_array[index]->cb ; + TRACEN(k_t_detail ,"core=%d desired_fifo=%d lifetime=0x%016llx",core, desired_fifo,lifetime_in_fifo) ; + if( skb->len >= 4096 && desired_fifo < k_injecting_directions && lifetime_in_fifo > max_lifetime_by_direction[desired_fifo]) + { + max_lifetime_by_direction[desired_fifo] = lifetime_in_fifo ; + } + if( skb->len >= 4096 && lifetime_in_fifo > 0x7fffffff) + { + struct sock *sk=skb->sk ; + struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + unsigned int daddr=inet->daddr ; + unsigned int flags = TCP_SKB_CB(skb)->flags ; + TRACEN(k_t_congestion,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u x=%d in-fifo-time=0x%016llx", + sk, skb, skb->data, skb->len, flags, + daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff, + x+index, + lifetime_in_fifo + ) ; + } +#endif + skb_group_add(group,skb_array[index]) ; + skb_array[index] = NULL ; + } + } +} +static void skb_group_queue(skb_group_t * group, struct sk_buff ** skb_array, unsigned int start, unsigned int count +#if defined(TRACK_LIFETIME_IN_FIFO) + , unsigned int core, unsigned int desired_fifo, unsigned long long now +#endif + ) __attribute__ ((unused)) ; +static void skb_group_queue(skb_group_t * group, struct sk_buff ** skb_array, unsigned int start, unsigned int count +#if defined(TRACK_LIFETIME_IN_FIFO) + , unsigned int core, unsigned int desired_fifo, unsigned long long now +#endif + ) +{ + TRACEN(k_t_detail , "Queuing skbs for freeing start=%d count=%d", start, count) ; + if( start+count <= k_injection_packet_count) + { + skb_group_queue_seq(group,skb_array+start, count +#if defined(TRACK_LIFETIME_IN_FIFO) + , core, desired_fifo, now, 0 +#endif + ) ; + } + else + { + skb_group_queue_seq(group,skb_array+start, k_injection_packet_count-start +#if defined(TRACK_LIFETIME_IN_FIFO) + , core, desired_fifo, now,0 +#endif + ) ; + skb_group_queue_seq(group,skb_array, count - (k_injection_packet_count-start) +#if defined(TRACK_LIFETIME_IN_FIFO) + , core, desired_fifo, now,k_injection_packet_count-start +#endif + ) ; + } + +} + +/* We will be using the injection machinery as circular buffers; this is the 'circle' function */ +static inline unsigned int packet_mod(unsigned int index) + { + return index & (k_injection_packet_count-1) ; + } + +/* Try to minimise the 'needless' spins if several cores try to inject contemporaneously -- not anymore, best not to overtake on a path */ +static inline int injection_group_hash(dma_tcp_t *dma_tcp,int x,int y, int z) +{ +/* return 0 ; */ + return ( x/2 + y/2 + z/2 ) & 3 & (dma_tcp->tuning_injection_hashmask); +} + +#if defined(BARRIER_WITH_IOCTL) + +static inline void timing_histogram(dma_tcp_t * dma_tcp) +{ + int current_tbl=get_tbl() ; + int delta_tbl=current_tbl-dma_tcp->prev_tbl ; + dma_tcp->timing_histogram_buckets[fls(delta_tbl)] += 1 ; + dma_tcp->prev_tbl = current_tbl ; + +} +#endif + + +static inline int wrapped_DMA_InjFifoInjectDescriptorById( + DMA_InjFifoGroup_t *fg_ptr, + int fifo_id, + DMA_InjDescriptor_t *desc + ) +{ + int rc ; + rc = DMA_InjFifoInjectDescriptorById(fg_ptr,fifo_id,desc) ; + return rc ; +} + + + +/* #define AUDIT_HEADLEN */ +/* #define TRACK_LIFETIME_IN_FIFO */ + +typedef struct +{ + DMA_InjDescriptor_t desc ; +#if defined(TRACK_LIFETIME_IN_FIFO) + unsigned long long injection_timestamp ; +#endif +#if defined(AUDIT_HEADLEN) + unsigned short tot_len ; +#endif + char free_when_done ; +} frame_injection_cb ; +extern unsigned int tot_len_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ; // TODO: fix the name if we leave it extern ... + +#if defined(AUDIT_FRAME_HEADER) +typedef struct { + struct ethhdr eth ; + struct iphdr iph ; +} frame_header_t ; +extern frame_header_t all_headers_in_counters[DMA_NUM_COUNTERS_PER_GROUP] ; // TODO: fix the name if we leave it extern ... +#endif + +static void dma_tcp_show_reception_one(dma_tcp_t * dma_tcp, unsigned int x, unsigned int counter_value) __attribute__((unused)) ; +static void dma_tcp_show_reception_one(dma_tcp_t * dma_tcp, unsigned int x, unsigned int counter_value) +{ + struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; + if( skb) + { + struct ethhdr *eth = (struct ethhdr *)(skb->data) ; + unsigned int eth_proto = eth->h_proto ; + + struct iphdr *iph = (struct iphdr *) (eth+1) ; + unsigned int tot_len=iph->tot_len ; + unsigned int saddr=iph->saddr ; + if( tot_len != tot_len_for_rcv[x]) + { + TRACEN(k_t_error,"(!!!) tot_len trampled") ; + } + + TRACEN(k_t_request,"(---) skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d slot=0x%08x conn_id=0x%02x tot_len_for_rcv=0x%04x counter_value=0x%04x", + skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff, dma_tcp->slot_for_rcv[x], dma_tcp->conn_for_rcv[x], tot_len_for_rcv[x],counter_value + ) ; + dumpmem(skb->data,0x42,"eth-ip-tcp header") ; + show_dma_descriptor((DMA_InjDescriptor_t *)&skb->cb) ; +#if defined(AUDIT_FRAME_HEADER) + if(memcmp(skb->data,((char *)(all_headers_in_counters+x)),32)) + { + TRACEN(k_t_request,"(!!!) header not as first seen") ; + dumpmem(skb->data-14,sizeof(frame_header_t),"header-now") ; + dumpmem(all_headers_in_counters+x,sizeof(frame_header_t),"header-in-propose") ; + + } +#endif + } + else + { + TRACEN(k_t_error|k_t_request,"(E) x=%d Counter in use but no skb !",x) ; + } + +} + +void __init +dma_tcp_diagnose_init(dma_tcp_t *dma_tcp) ; + +void __init +bgp_dma_memcpyInit(dma_tcp_t *dma_tcp) ; + +void __init +dma_tcp_devfs_procfs_init(dma_tcp_t *dma_tcp) ; + +#if defined(TRACK_LIFETIME_IN_FIFO) +extern unsigned long long max_lifetime_by_direction[k_injecting_directions] ; +#endif + +#if defined(CONFIG_BGP_TORUS_DIAGNOSTICS) +extern int tcp_scattergather_frag_limit ; +#endif + +typedef struct { unsigned char c[240] ; } torus_frame_payload ; + +#endif diff --git a/drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c b/drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c new file mode 100644 index 00000000000000..931ae5365f1502 --- /dev/null +++ b/drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c @@ -0,0 +1,707 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Description: Blue Gene low-level driver for sockets over torus + * + * + * Intent: Carry torus packets as messages into memory FIFOs, and interpret them + * as eth frames for TCP + * Later on, add token-based flow control with a view to preventing + * congestion collapse as the machine gets larger and the loading gets higher + * + ********************************************************************/ +#define REQUIRES_DUMPMEM + +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/highmem.h> +#include <linux/mman.h> +#include <linux/syscalls.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/ip.h> + + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/time.h> +#include <asm/bitops.h> +#include <linux/vmalloc.h> + +#include <linux/dma-mapping.h> + +#include <net/inet_connection_sock.h> +#include <net/inet_sock.h> +#include <net/inet_hashtables.h> +#include <net/tcp.h> + + + +/* #include "bglink.h" */ +#include <spi/linux_kernel_spi.h> + +#include <asm/time.h> + +/* #define CONFIG_BLUEGENE_TORUS_TRACE */ +/* #define CRC_CHECK_FRAMES */ +#define VERIFY_TARGET +/* #define SIDEBAND_TIMESTAMP */ +#include "bgp_dma_tcp.h" + + + + +/* void bgp_dma_diag_reissue_rec_counters(dma_tcp_t *dma_tcp) */ +/* { */ +/* unsigned int x; */ +/* for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1) */ +/* { */ +/* struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; */ +/* if( skb) */ +/* { */ +/* frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ; */ +/* TRACEN(k_t_general,"Redriving x=%d skb=%p",x,skb) ; */ +/* inject_dma_descriptor_propose_accept(dma_tcp,0,&ficb->desc) ; */ +/* } */ +/* } */ +/* } */ + +static inline void show_tx_skbs(tx_t *tx, unsigned int node_count) +{ + unsigned int slot_index ; + unsigned int conn_id ; + unsigned int tx_skb_count = 0 ; + for(slot_index=0;slot_index<node_count;slot_index += 1) + { + for( conn_id=0;conn_id < k_connids_per_node;conn_id += 1) + { + struct sk_buff * skb=get_tx_skb(tx,slot_index,conn_id) ; + if(skb) + { + struct ethhdr *eth = (struct ethhdr *)(skb->data) ; + struct iphdr *iph = (struct iphdr *) (eth+1) ; + unsigned int tot_len=iph->tot_len ; + unsigned int daddr=iph->daddr ; + tx_skb_count += 1 ; + + TRACEN(k_t_request,"(---) slot_index=0x%08x conn_id=0x%02x skb=%p tot_len=0x%04x daddr=%d.%d.%d.%d", + slot_index,conn_id,skb,tot_len,daddr>>24, (daddr >> 16) & 0xff,(daddr >> 8) & 0xff, daddr & 0xff) ; + } + } + } + TRACEN(k_t_request,"tx_skb_count=%d",tx_skb_count) ; +} + +void dma_tcp_show_reception(dma_tcp_t * dma_tcp) +{ + int x ; + int slot ; + unsigned int inUseCount = 0 ; + TRACEN(k_t_request,"rec hitZero 0x%08x 0x%08x",DMA_CounterGetHitZero(&dma_tcp->recCounterGroup,0),DMA_CounterGetHitZero(&dma_tcp->recCounterGroup,1)) ; + for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1) + { + bgp_dma_tcp_counter_copies[x] = DMA_CounterGetValueNoMsync(dma_tcp->recCounterGroup.counter+x) ; + if( bgp_dma_tcp_counter_copies[x] != 0 || dma_tcp->recCntrInUse[x] != 0) + { + inUseCount += 1 ; + TRACEN(k_t_request,"rec_counter[0x%02x] value=0x%08x inUse=%d", x,bgp_dma_tcp_counter_copies[x],dma_tcp->recCntrInUse[x]) ; + if(dma_tcp->recCntrInUse[x]) + { + dma_tcp_show_reception_one(dma_tcp,x,bgp_dma_tcp_counter_copies[x]) ; +/* struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; */ +/* if( skb) */ +/* { */ +/* struct ethhdr *eth = (struct ethhdr *)(skb->data) ; */ +/* unsigned int eth_proto = eth->h_proto ; */ +/* */ +/* struct iphdr *iph = (struct iphdr *) (eth+1) ; */ +/* unsigned int tot_len=iph->tot_len ; */ +/* unsigned int saddr=iph->saddr ; */ +/* if( tot_len != tot_len_for_rcv[x]) */ +/* { */ +/* TRACEN(k_t_error,"(!!!) tot_len trampled") ; */ +/* } */ +/* */ +/* TRACEN(k_t_request,"(---) skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d slot=0x%08x conn_id=0x%02x tot_len_for_rcv=0x%04x", */ +/* skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff, dma_tcp->slot_for_rcv[x], dma_tcp->conn_for_rcv[x], tot_len_for_rcv[x] */ +/* ) ; */ +/* dumpmem(skb->data,0x42,"eth-ip-tcp header") ; */ +/* show_dma_descriptor((DMA_InjDescriptor_t *)&skb->cb) ; */ +/* #if defined(AUDIT_FRAME_HEADER) */ +/* if(memcmp(skb->data,((char *)(all_headers_in_counters+x)),32)) */ +/* { */ +/* TRACEN(k_t_request,"(!!!) header not as first seen") ; */ +/* dumpmem(skb->data-14,sizeof(frame_header_t),"header-now") ; */ +/* dumpmem(all_headers_in_counters+x,sizeof(frame_header_t),"header-in-propose") ; */ +/* */ +/* } */ +/* #endif */ +/* } */ +/* else */ +/* { */ +/* TRACEN(k_t_error|k_t_request,"(E) x=%d Counter in use but no skb !",x) ; */ +/* } */ + } + } + } + TRACEN(k_t_request,"inUseCount=%d",inUseCount) ; + show_tx_skbs(&dma_tcp->tx_mux,dma_tcp->node_count) ; + TRACEN(k_t_request,"skb_queue_len(pending_rcv_skbs)=%d",skb_queue_len(&dma_tcp->balancer.b[0].pending_rcv_skbs)) ; + { + struct sk_buff *skb = skb_peek(&dma_tcp->balancer.b[0].pending_rcv_skbs) ; + if(skb) + { + + struct ethhdr *eth = (struct ethhdr *)(skb->data) ; + unsigned int eth_proto = eth->h_proto ; + + struct iphdr *iph = (struct iphdr *) (eth+1) ; + unsigned int tot_len=iph->tot_len ; + unsigned int saddr=iph->saddr ; + TRACEN(k_t_request,"skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d",skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff ) ; + } + + } + for( slot=0;slot<dma_tcp->node_count; slot+=1) + { + unsigned int proposals_active=get_proposals_active(&dma_tcp->rcvdemux,slot) ; + unsigned int count_pending_f=count_pending_flow(&dma_tcp->rcvdemux,slot) ; + unsigned int located_counters=0 ; + if( proposals_active || count_pending_f ) + { + TRACEN(k_t_request,"slot=0x%08x proposals_active=%d count_pending_flow=%d",slot,proposals_active,count_pending_f) ; + } + for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1) + { + struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; + if ( skb && slot == dma_tcp->slot_for_rcv[x] ) + { + located_counters += 1 ; + } + } + if( located_counters + count_pending_f != proposals_active || ( 0 == located_counters && count_pending_f > 0 )) + { + TRACEN(k_t_request|k_t_error,"(E) slot=0x%08x located_counters=%d count_pending_f=%d proposals_active=%d", + slot,located_counters,count_pending_f,proposals_active) ; + } + + } +} + +int proc_do_dma_rec_counters(struct ctl_table *ctl, int write, struct file * filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int rc ; + dma_tcp_show_reception(&dma_tcp_state ) ; + TRACEN(k_t_entryexit,"(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ; + rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ; + TRACEN(k_t_entryexit,"(<)") ; + return rc ; + +} + +/* Routine to report how full the outgoing FIFOs are */ +void bgp_dma_diag_report_transmission_queue(int __user * report) + { + dma_tcp_t *dma_tcp = &dma_tcp_state ; + unsigned int core ; + TRACEN(k_t_general,"report=%p",report) ; + for( core=0 ; core<k_injecting_cores; core += 1) + { + unsigned int desired_fifo ; + for(desired_fifo=0; desired_fifo<k_injecting_directions; desired_fifo += 1 ) + { + unsigned int fifo_initial_head = dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].fifo_initial_head ; + unsigned int fifo_current_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ; + unsigned int fifo_current_tail = + (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ; + unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ; + unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ; + unsigned int current_injection_used=packet_mod(tailx-headx) ; + put_user(current_injection_used, report) ; + report += 1 ; + TRACEN(k_t_detail,"core=%d desired_fifo=%d current_injection_used=%d",core,desired_fifo,current_injection_used) ; + + } + + + } + put_user(dma_tcp->qtyFreeRecCounters, report) ; + report += 1 ; + put_user(flow_count(dma_tcp,k_send_propose_rpc)-flow_count(dma_tcp,k_act_accept_rpc), report) ; + report += 1 ; + put_user(flow_count(dma_tcp,k_act_propose_rpc)-flow_count(dma_tcp,k_send_accept_rpc), report) ; + } +static int issueDiagnose( + DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + dma_tcp_t * dma_tcp, + void * request , + int payload_bytes, + unsigned int src_key, + int Put_Offset + ) + { + unsigned int *payload=(unsigned int *)request ; + TRACEN(k_t_request,"src_key=0x%08x Put_Offset=0x%08x payload_bytes=0x%02x [%08x %08x %08x %08x]", + src_key,Put_Offset, payload_bytes,payload[0],payload[1],payload[2],payload[3]) ; + return 0 ; + } + +static int issueDiagnoseActor(DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) + { + unsigned int SW_Arg=packet_ptr->SW_Arg ; + int Put_Offset=packet_ptr->Put_Offset ; + enable_kernel_fp() ; // TODO: don't think this is needed nowadays + + TRACEN(k_t_detail,"recv_func_parm=%p payload_ptr=%p SW_Arg=0x%08x payload_bytes=0x%08x Put_Offset=0x%08x", + recv_func_parm,payload_ptr,SW_Arg,payload_bytes,Put_Offset) ; + return issueDiagnose( + f_ptr, + packet_ptr, + (dma_tcp_t *) recv_func_parm, + (void *) payload_ptr, + payload_bytes, + SW_Arg, + Put_Offset + ) ; + } +static inline int inject_into_dma_diag_sync(dma_tcp_t *dma_tcp, void * address, unsigned int length, unsigned int x, unsigned int y, unsigned int z, unsigned int my_injection_group, unsigned int desired_fifo, unsigned int SW_Arg , + unsigned int proto_start ) + { + dma_addr_t dataAddr ; + DMA_InjDescriptor_t desc; + int ret1, ret2 __attribute__((unused)); + unsigned int firstpacketlength = length ; + TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo); + dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE); + +/* First injection is 'start of frame/fragment' */ + ret1 = DMA_TorusMemFifoDescriptor( &desc, + x, y, z, + k_ReceptionFifoGroup, /* recv fifo grp id */ + 0, /* hints */ + k_VC_anyway, /* vc - adaptive */ + SW_Arg, /* softw arg */ + proto_start, /* function id */ + k_InjectionCounterGroup, /* inj cntr group id */ + k_injCounterId, /* inj counter id */ + dataAddr, /* send address */ + firstpacketlength /* msg len */ + ); + +#if defined(SIDEBAND_TIMESTAMP) + { + unsigned long now_lo=get_tbl() ; + DMA_DescriptorSetPutOffset(&desc,((-length) & 0x0000ffff ) | (now_lo & 0xffff0000)) ; + + } +#else + DMA_DescriptorSetPutOffset(&desc,-length) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to indicate the message (fragment) length */ +#endif + ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames, + dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo], + &desc ); + TRACEN(k_t_general , "(<)proto_start=%d firstpacketlength=%d ret1=%d ret2=%d",proto_start,firstpacketlength,ret1, ret2); + return 1 ; + } + +static void bgp_dma_diag_drive_sync_at(dma_tcp_t *dma_tcp, int x,int y,int z, int sendBytes) +{ + unsigned int desired_fifo= select_transmission_fifo(dma_tcp,x,y,z) ; + unsigned long flags ; + unsigned int current_injection_used=0xffffffff ; + unsigned int aligned_payload_address = (unsigned int)dma_tcp->diag_block_buffer ; + unsigned int aligned_payload_length = sendBytes ; + unsigned int pad_head = 0 ; + + int ret = 0; + int ring_ok ; + + int my_injection_group ; + skb_group_t skb_group ; + TRACEN(k_t_general ,"(>) at (%02x,%02x,%02x)", x,y,z); + skb_group_init(&skb_group) ; + + my_injection_group=injection_group_hash(dma_tcp,x,y,z) ; + spin_lock_irqsave(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ; + { + unsigned int src_key = (dma_tcp->src_key << 6) | (my_injection_group << 4) | pad_head ; + idma_direction_t * buffer = dma_tcp->idma.idma_core[my_injection_group].idma_direction+desired_fifo ; + /* Set up the payload */ + unsigned int bhx = buffer->buffer_head_index ; + unsigned int lastx = packet_mod(bhx) ; + unsigned int fifo_initial_head = dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].fifo_initial_head ; + unsigned int fifo_current_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ; + unsigned int fifo_current_tail = + (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ; + unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ; + unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ; + unsigned int injection_count ; +#if defined(TRACK_LIFETIME_IN_FIFO) + unsigned long long now=get_powerpc_tb() ; + *(unsigned long long*)(skb->cb) = now ; +#endif + current_injection_used=packet_mod(tailx-headx) ; + /* If the network is backing up, we may have to skip out here, */ + /* so that we don't overwrite unsent data. */ + TRACEN(k_t_general ,"Runway desired_fifo=%d headx=%d tailx=%d bhx=%d current_injection_used=%d", + desired_fifo,headx,tailx,bhx,current_injection_used) ; + if( current_injection_used > buffer->injection_high_watermark ) + { + buffer->injection_high_watermark=current_injection_used ; /* Congestion statistic */ + } + { + /* Need to have room to inject the in-skbuff data plus all attached 'fragments', each of which may be sent in 3 injections */ + if( current_injection_used+3*(MAX_SKB_FRAGS+1) < k_injection_packet_count-1) + { + ring_ok = 1 ; + TRACEN(k_t_general,"Runway slot granted") ; + } + else + { + ring_ok = 0 ; + TRACEN(k_t_congestion,"Runway slot denied tailx=%08x headx=%08x",tailx,headx) ; + } + } + TRACEN(k_t_general ,"Injection my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x", + my_injection_group, desired_fifo, bhx, headx,tailx + ) ; + if ( ring_ok ) + { + /* We are going to send something. Display its protocol headers .. */ + + /* Bump the injection counter. Actually only needs doing once per 4GB or so */ + ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId, 0xffffffff ); + + /* and inject it */ + { + + injection_count = inject_into_dma_diag_sync(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z,my_injection_group,desired_fifo, + src_key, + dma_tcp->proto_issue_diag_sync + ) ; + + + + } + { + unsigned int nhx=packet_mod(bhx+injection_count) ; + /* Record the skbuff so it can be freed later, after data is DMA'd out */ + dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array[nhx] = NULL ; + /* Remember where we will be pushing the next injection in */ + buffer->buffer_head_index = nhx ; + } + /* hang on to the skbs until they are sent ... */ + if( current_injection_used != 0xffffffff) + { + unsigned int btx = buffer->buffer_tail_index ; /* This indexes the oldest skbuff that might still be pending send by the DMA unit */ + int skql2 = packet_mod(bhx-btx) ; + int count_needing_freeing = skql2-current_injection_used ; + int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ; + TRACEN(k_t_detail ,"current_injection_used=%d btx=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,btx,skql2,count_needing_freeing,count_to_free); + skb_group_queue(&skb_group,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free +#if defined(TRACK_LIFETIME_IN_FIFO) + , my_injection_group, desired_fifo, now +#endif + ) ; + btx = packet_mod(btx+count_to_free) ; + buffer->buffer_tail_index = btx ; + TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index); + } + } + else + { + TRACEN(k_t_congestion,"Would overrun my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x lastx=%08x", + my_injection_group, desired_fifo, bhx, headx,tailx, lastx + ) ; + } + } + spin_unlock_irqrestore(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ; + skb_group_free(&skb_group) ; + if( k_async_free ) mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ; + + TRACE("(<) desired_fifo=%d",desired_fifo); + +} +static void init_shuffle_vector(unsigned int * shuffle_vector, unsigned int xe, unsigned int ye, unsigned int ze) +{ + unsigned int x; + unsigned int y; + unsigned int z; + for( x=0; x<xe; x+=1) + { + for(y=0;y<ye;y+=1) + { + for( z=0;z<ze;z+=1) + { + *shuffle_vector = (x<<16)|(y<<8)|z ; + shuffle_vector += 1 ; + } + } + } +} + +static inline int next_prbs(int seed) +{ + int ncmask = seed >> 31 ; /* 0x00000000 or 0xffffffff */ + return (seed << 1) ^ (0x04C11DB7 & ncmask) ; /* CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */ +} + +static int scatter_prbs(int seed) +{ + int a ; + for(a=0;a<32;a+=1) + { + seed=next_prbs(seed) ; + } + return seed ; +} +static int shuffle_shuffle_vector(unsigned int * shuffle_vector, unsigned int xe, unsigned int ye, unsigned int ze, int seed) +{ + unsigned int vsize = xe*ye*ze ; + unsigned int vmask = vsize-1 ; + unsigned int a ; + + for( a=0; a<vsize;a+=1) + { + unsigned int b = (seed & vmask) ; + unsigned int va = shuffle_vector[a] ; + unsigned int vb = shuffle_vector[b] ; + shuffle_vector[a] = vb ; + shuffle_vector[b] = va ; + seed=next_prbs(seed) ; + + } + return seed ; +} +#if 0 +void dma_tcp_transfer_activate(int sendBytes) +{ + dma_tcp_t *dma_tcp = &dma_tcp_state ; + int a ; + int my_x=dma_tcp->location.coordinate[0] ; + int my_y=dma_tcp->location.coordinate[1] ; + int my_z=dma_tcp->location.coordinate[2] ; + int ext_x=dma_tcp->extent.coordinate[0] ; + int ext_y=dma_tcp->extent.coordinate[1] ; + int ext_z=dma_tcp->extent.coordinate[2] ; + int vsize=ext_x*ext_y*ext_z ; + /* Push the 'diagnostic block' through the DMA unit */ + TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ; + dma_tcp->shuffle_seed = shuffle_shuffle_vector(dma_tcp->shuffle_vector,ext_x,ext_y,ext_z,dma_tcp->shuffle_seed) ; + for(a=0;a<vsize;a+=1) + { + unsigned int tg=dma_tcp->shuffle_vector[a] ; + unsigned int tg_x=tg>>16 ; + unsigned int tg_y=(tg>>8) & 0xff ; + unsigned int tg_z=tg & 0xff ; + TRACEN(k_t_detail,"shuffle_vector[%d]=0x%08x",a,dma_tcp->shuffle_vector[a]) ; + if( my_x != tg_x || my_y != tg_y || my_z != tg_z ) + { + bgp_dma_diag_drive_block_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ; + } + } +} + +void dma_tcp_transfer_activate_to_one(int sendBytes, unsigned int tg) +{ + dma_tcp_t *dma_tcp = &dma_tcp_state ; + int my_x=dma_tcp->location.coordinate[0] ; + int my_y=dma_tcp->location.coordinate[1] ; + int my_z=dma_tcp->location.coordinate[2] ; + /* Push the 'diagnostic block' through the DMA unit */ + TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x tg=0x%08x",sendBytes,tg) ; + { + unsigned int tg_x=tg>>16 ; + unsigned int tg_y=(tg>>8) & 0xff ; + unsigned int tg_z=tg & 0xff ; + if( my_x != tg_x || my_y != tg_y || my_z != tg_z ) + { + bgp_dma_diag_drive_block_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ; + } + } +} +#endif +void dma_tcp_transfer_activate_sync(int sendBytes) +{ + dma_tcp_t *dma_tcp = &dma_tcp_state ; + int a ; + int my_x=dma_tcp->location.coordinate[0] ; + int my_y=dma_tcp->location.coordinate[1] ; + int my_z=dma_tcp->location.coordinate[2] ; + int ext_x=dma_tcp->extent.coordinate[0] ; + int ext_y=dma_tcp->extent.coordinate[1] ; + int ext_z=dma_tcp->extent.coordinate[2] ; + int vsize=ext_x*ext_y*ext_z ; + /* Push the 'diagnostic block' through the DMA unit */ + TRACEN(k_t_general,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ; + dma_tcp->shuffle_seed = shuffle_shuffle_vector(dma_tcp->shuffle_vector,ext_x,ext_y,ext_z,dma_tcp->shuffle_seed) ; + for(a=0;a<vsize;a+=1) + { + unsigned int tg=dma_tcp->shuffle_vector[a] ; + unsigned int tg_x=tg>>16 ; + unsigned int tg_y=(tg>>8) & 0xff ; + unsigned int tg_z=tg & 0xff ; + TRACEN(k_t_detail,"shuffle_vector[%d]=0x%08x",a,dma_tcp->shuffle_vector[a]) ; + if( my_x != tg_x || my_y != tg_y || my_z != tg_z ) + { + bgp_dma_diag_drive_sync_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ; + } + } +} + +/* 'across faces' transfer in x,y,z directions, as a 'towards peak performance' test */ +#if 0 +void dma_tcp_transfer_activate_minicube(int sendBytes) +{ + dma_tcp_t *dma_tcp = &dma_tcp_state ; + int my_x=dma_tcp->location.coordinate[0] ; + int my_y=dma_tcp->location.coordinate[1] ; + int my_z=dma_tcp->location.coordinate[2] ; + /* Push the 'diagnostic block' through the DMA unit */ + TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ; + bgp_dma_diag_drive_block_at(dma_tcp,my_x^1,my_y,my_z,sendBytes) ; + bgp_dma_diag_drive_block_at(dma_tcp,my_x,my_y^1,my_z,sendBytes) ; + bgp_dma_diag_drive_block_at(dma_tcp,my_x,my_y,my_z^1,sendBytes) ; +} + +int dma_tcp_transfer_wait(int demandCount) +{ + int spincount = 0 ; + TRACEN(k_t_request,"(>) demandCount=%d",demandCount) ; + while(DiagEndCount < demandCount && spincount < 100 ) + { + int rc ; + set_current_state(TASK_INTERRUPTIBLE); + rc=schedule_timeout(1) ; + if( 0 != rc) break ; + spincount += 1 ; + } + TRACEN(k_t_request,"(<) DiagEndCount=%d spincount=%d",DiagEndCount,spincount) ; + return DiagEndCount >= demandCount ; +} +#endif +#if defined(BARRIER_WITH_IOCTL) +volatile static int DiagSyncCount ; + +static int issueInlineFrameDiagSync( + DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + dma_tcp_t * dma_tcp, + void * request , + int payload_bytes, + unsigned int src_key, + int Put_Offset + ) + { + timing_histogram(dma_tcp) ; + DiagSyncCount += 1 ; + return 0 ; + } + +static int issueInlineFrameDiagSyncActor(DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) + { + unsigned int SW_Arg=packet_ptr->SW_Arg ; + int Put_Offset=packet_ptr->Put_Offset ; + + enable_kernel_fp() ; // TODO: don't think this is needed nowadays + TRACEN(k_t_detail,"recv_func_parm=%p payload_ptr=%p SW_Arg=0x%08x payload_bytes=0x%08x Put_Offset=0x%08x", + recv_func_parm,payload_ptr,SW_Arg,payload_bytes,Put_Offset) ; + return issueInlineFrameDiagSync( + f_ptr, + packet_ptr, + (dma_tcp_t *) recv_func_parm, + (void *) payload_ptr, + payload_bytes, + SW_Arg, + Put_Offset + ) ; + } + +#endif + +int dma_tcp_transfer_wait_sync(int demandCount) +{ + int spincount = 0 ; + TRACEN(k_t_general,"(>) demandCount=%d",demandCount) ; + while(DiagSyncCount < demandCount && spincount < 100 ) + { + int rc ; + set_current_state(TASK_INTERRUPTIBLE); + rc=schedule_timeout(1) ; + if( 0 != rc) break ; + spincount += 1 ; + } + TRACEN(k_t_general,"(<) DiagSyncCount=%d spincount=%d",DiagSyncCount,spincount) ; + return DiagSyncCount >= demandCount ; +} + +void dma_tcp_transfer_clearcount(void) +{ + TRACEN(k_t_general,"count cleared") ; +/* DiagEndCount = 0 ; */ + DiagSyncCount = 0 ; +} + +void __init +dma_tcp_diagnose_init(dma_tcp_t *dma_tcp) + { +#if defined(BARRIER_WITH_IOCTL) + dma_tcp->diag_block_buffer=allocate_diag_block_buffer() ; + dma_tcp->shuffle_vector=allocate_shuffle_vector(dma_tcp->extent.coordinate[0],dma_tcp->extent.coordinate[1],dma_tcp->extent.coordinate[2]) ; + dma_tcp->shuffle_seed = scatter_prbs(dma_tcp->SW_Arg + 1) ; + init_shuffle_vector(dma_tcp->shuffle_vector,dma_tcp->extent.coordinate[0],dma_tcp->extent.coordinate[1],dma_tcp->extent.coordinate[2]) ; + dma_tcp->proto_issue_diag_sync=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDiagSyncActor, dma_tcp, 0, 0); + memset(dma_tcp->timing_histogram_buckets,0,33*sizeof(int)) ; +#endif + dma_tcp->proto_diagnose=DMA_RecFifoRegisterRecvFunction(issueDiagnoseActor, dma_tcp, 0, 0); + + } diff --git a/drivers/net/bgp_torus/bgp_dma_tcp_frames.c b/drivers/net/bgp_torus/bgp_dma_tcp_frames.c new file mode 100644 index 00000000000000..3e9fd7715756db --- /dev/null +++ b/drivers/net/bgp_torus/bgp_dma_tcp_frames.c @@ -0,0 +1,2712 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Description: Blue Gene low-level driver for sockets over torus + * + * + * Intent: Carry torus packets as messages into memory FIFOs, and interpret them + * as eth frames for TCP + * Later on, add token-based flow control with a view to preventing + * congestion collapse as the machine gets larger and the loading gets higher + * + ********************************************************************/ +#define REQUIRES_DUMPMEM + +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/highmem.h> +#include <linux/mman.h> +#include <linux/syscalls.h> +#include <linux/skbuff.h> +#include <linux/etherdevice.h> +#include <linux/if_ether.h> +#include <linux/ip.h> + + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/time.h> +#include <asm/bitops.h> +#include <linux/vmalloc.h> + +#include <linux/dma-mapping.h> + +#include <net/inet_connection_sock.h> +#include <net/inet_sock.h> +#include <net/inet_hashtables.h> +#include <net/tcp.h> + + + +/* #include "bglink.h" */ +#include <spi/linux_kernel_spi.h> + +#include <asm/time.h> + +/* #define CONFIG_BLUEGENE_TORUS_TRACE */ +/* #define CRC_CHECK_FRAMES */ +#define VERIFY_TARGET +/* #define SIDEBAND_TIMESTAMP */ + +#include "bgp_dma_tcp.h" +#include "bgp_bic_diagnosis.h" + + +static inline void frames_receive_torus(dma_tcp_t *dma_tcp,struct sk_buff * skb) +{ +#if defined(CONFIG_BGP_STATISTICS) + struct ethhdr *eth = (struct ethhdr *) (skb->data) ; + struct iphdr *iph=(struct iphdr *) (eth+1) ; + dma_tcp->bytes_received += iph->tot_len ; +#endif + bgtornet_receive_torus(skb); +} + +#if defined(TRACK_LIFETIME_IN_FIFO) +unsigned long long max_lifetime_by_direction[k_injecting_directions] ; +#endif + +static void diag_skb_structure(struct sk_buff *skb) +{ + int f=skb_shinfo(skb)->nr_frags ; + if(0 == f) + { + TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=0 [0x%04x]",skb->len, skb->data_len, skb_headlen(skb)) ; + } + else if(1 == f) + { + TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=1 [0x%04x 0x%04x]",skb->len, skb->data_len, skb_headlen(skb), + skb_shinfo(skb)->frags[0].size + ) ; + } + else if(2 == f) + { + TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=2 [0x%04x 0x%04x 0x%04x]",skb->len, skb->data_len, skb_headlen(skb), + skb_shinfo(skb)->frags[0].size, + skb_shinfo(skb)->frags[1].size + ) ; + } + else + { + TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=%d [0x%04x 0x%04x 0x%04x 0x%04x ..]",skb_shinfo(skb)->nr_frags, + skb->len, skb->data_len, skb_headlen(skb), + skb_shinfo(skb)->frags[0].size, + skb_shinfo(skb)->frags[1].size, + skb_shinfo(skb)->frags[2].size + ) ; + } + if( TRACING(k_t_sgdiag_detail)) + { + unsigned int dump_length = ( skb_headlen(skb) < 256 ) ? skb_headlen(skb) : 256 ; + dumpmem(skb->data, dump_length, "skb_head") ; + } +} + +static inline int torus_frame_payload_memcpy( + torus_frame_payload * target, + torus_frame_payload * source + ) +{ + *target = *source ; + return 0 ; +} + +/* This is as per the powerpc <asm/time.h> 'get_tb' */ +/* Dup'd here because we have to compile with ppc also, which doesn't have it defined */ +static inline u64 get_powerpc_tb(void) +{ + unsigned int tbhi, tblo, tbhi2; + + tbhi = get_tbu(); + tblo = get_tbl(); + tbhi2 = get_tbu(); + /* tbhi2 might be different from tbhi, but that would indicate that there had been a 32-bit carry. + * In that case (tbhi2,0) would be a reasonable representation of the timestamp that we usually + * think of as being (tbhi,tblo) + */ + if( tbhi == tbhi2) + { + return ((u64)tbhi << 32) | tblo; + } + return ((u64)tbhi2 << 32) ; +} +static void display_skb_structure(struct sk_buff *skb) ; + +static torus_frame_payload dummy_payload __attribute__((aligned(16))); +static inline void demux_vacate_slot(dma_tcp_t * dma_tcp, unsigned int slot) + { + set_rcv_payload(&dma_tcp->rcvdemux, slot, (char *)&dummy_payload); + set_rcv_payload_alert(&dma_tcp->rcvdemux, slot, (char *)&dummy_payload); + set_rcv_expect(&dma_tcp->rcvdemux, slot, 0xffffffff); + set_rcv_skb(&dma_tcp->rcvdemux, slot, NULL); + TRACEN(k_t_general,"Slot %d vacated", slot ); + } + +static inline void demux_show_slot(dma_tcp_t * dma_tcp, unsigned int slot) + { + void *payload = get_rcv_payload(&dma_tcp->rcvdemux, slot); + void *alert = get_rcv_payload_alert(&dma_tcp->rcvdemux, slot); + unsigned int expect=get_rcv_expect(&dma_tcp->rcvdemux, slot); + struct sk_buff *skb=get_rcv_skb(&dma_tcp->rcvdemux, slot); + if( payload != &dummy_payload || expect != 0xffffffff || skb ) + { + TRACEN(k_t_error,"(E) not-vacant slot=%08x (%d %d) payload=%p alert=%p expect=0x%08x skb=%p", + slot, slot>>2, slot&3, payload, alert, expect, skb + ) ; + } + } + +static void init_demux_table(dma_tcp_t * dma_tcp, unsigned int node_count ) ; + +static void init_demux_table(dma_tcp_t * dma_tcp, unsigned int node_count ) + { + unsigned int x ; + for( x = 0 ; x < k_slots_per_node*node_count ; x += 1) + { + demux_vacate_slot(dma_tcp,x) ; +#if defined(ENABLE_LATENCY_TRACKING) + rcv_statistic_clear(&(dma_tcp->rcvdemux.rcv_per_slot_vector[x].latency)); +/* set_min_latency(&dma_tcp->rcvdemux, x, 0x7fffffff) ; */ +/* set_max_latency(&dma_tcp->rcvdemux, x, 0x80000000) ; */ +#endif + } + } + + +static void show_protocol_header_tx(char * frame) __attribute__ ((unused)) ; +static void show_protocol_header_tx(char * frame) + { + int * f = (int *) frame ; + TRACEN(k_t_request,"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x", + f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9],f[10],f[11],f[12],f[13],f[14],f[15],f[16] + ); + } + +static void show_protocol_header_fault(char * frame) __attribute__ ((unused)) ; +static void show_protocol_header_fault(char * frame) + { + int * f = (int *) frame ; + TRACEN(k_t_error,"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x", + f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9],f[10],f[11],f[12],f[13],f[14],f[15],f[16] + ); + } + +static void show_protocol_header_rx(char * frame) __attribute__ ((unused)) ; +static void show_protocol_header_rx(char * frame) + { + int * f = (int *) frame ; + TRACEN(k_t_general,"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x", + f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9],f[10],f[11],f[12],f[13],f[14],f[15],f[16] + ); + } + +/* Polynomial picked as CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */ +static int frametrace_rx(char * address, int length ) __attribute__ ((unused)) ; +static int frametrace_rx(char * address, int length ) + { + int * a = (int *) address ; + int x ; + int csum32 = a[0] ; + for(x=1;x<(length/sizeof(int));x+=1) + { + csum32 = (csum32 << 1 ) ^ a[x] ^ ( (csum32 & 0x80000000) ? 0x04C11DB7 : 0 ) ; + } + TRACEN(k_t_general,"address=%p length=%d csum32=0x%08x",address,length,csum32) ; + return csum32 ; + } + +static int frametrace_tx(char * address, int length ) __attribute__ ((unused)) ; +static int frametrace_tx(char * address, int length ) + { + int * a = (int *) address ; + int x ; + int csum32 = a[0] ; + for(x=1;x<(length/sizeof(int));x+=1) + { + csum32 = (csum32 << 1 ) ^ a[x] ^ ( (csum32 & 0x80000000) ? 0x04C11DB7 : 0 ) ; + } + TRACEN(k_t_general,"address=%p length=%d csum32=0x%08x",address,length,csum32) ; + return csum32 ; + } + +/* For diagnosis, put the local clock into the packet. Drop 4 lsbs off the 64-bit clock. */ +static unsigned int latency_timestamp(void) __attribute__ ((unused)) ; +static unsigned int latency_timestamp(void) + { + unsigned int tbu = get_tbu() ; + unsigned int tbl = get_tbl() ; + unsigned int tbu2 = get_tbu() ; + unsigned int tbl2 = (tbu==tbu2) ? tbl : 0 ; + return (tbu2 << 28) | (tbl2 >> 4) ; + } + + + +static void spot_examine_tcp_timestamp(int tsval, int tsecr) +{ + if( tsecr != 0 ) + { + int rtt=jiffies-tsecr ; + TRACEN(k_t_general,"rtt=%d",rtt) ; +#if defined(CONFIG_BGP_STATISTICS) + rtt_histogram[fls(rtt)] += 1 ; +#endif + } + if( tsval != 0 ) + { + int transit=jiffies-tsval ; + TRACEN(k_t_general,"transit=%d",transit) ; +#if defined(CONFIG_BGP_STATISTICS) + if( transit >= 0) + { + transit_histogram[fls(transit)] += 1 ; + } +#endif + } + +} + +static void spot_parse_aligned_timestamp(struct tcphdr *th) +{ + __be32 *ptr = (__be32 *)(th + 1); + int tsecr ; + int tsval ; + if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { + ++ptr; + tsval = ntohl(*ptr); + ++ptr; + tsecr = ntohl(*ptr); +#if defined(CONFIG_BGP_TORUS) + spot_examine_tcp_timestamp(tsval,tsecr) ; +#endif + } +} + +static void spot_fast_parse_options(struct sk_buff *skb, struct tcphdr *th) +{ + if (th->doff == sizeof(struct tcphdr) >> 2) { + return; + } else if ( + th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { + spot_parse_aligned_timestamp( th) ; + } +} + +static inline void analyse_tcp_flags(dma_tcp_t * dma_tcp,struct sk_buff * skb) +{ +#if defined(KEEP_TCP_FLAG_STATS) + struct ethhdr *eth = (struct ethhdr *)(skb->data) ; + struct iphdr *iph = (struct iphdr *)(eth+1) ; + unsigned int * iph_word = (unsigned int *) iph ; + struct tcphdr * tcph = (struct tcphdr *)(iph_word+(iph->ihl)) ; + unsigned int eth_proto = eth->h_proto ; + unsigned int ip_proto = iph->protocol ; + if( eth_proto == ETH_P_IP && ip_proto == IPPROTO_TCP ) + { + unsigned int flag_fin = tcph->fin ; + unsigned int flag_syn = tcph->syn ; + unsigned int flag_rst = tcph->rst ; + unsigned int flag_psh = tcph->psh ; + unsigned int flag_ack = tcph->ack ; + unsigned int flag_urg = tcph->urg ; + unsigned int flag_ece = tcph->ece ; + unsigned int flag_cwr = tcph->cwr ; + dma_tcp->tcp_received_flag_count[7] += flag_fin ; + dma_tcp->tcp_received_flag_count[6] += flag_syn ; + dma_tcp->tcp_received_flag_count[5] += flag_rst ; + dma_tcp->tcp_received_flag_count[4] += flag_psh ; + dma_tcp->tcp_received_flag_count[3] += flag_ack ; + dma_tcp->tcp_received_flag_count[2] += flag_urg ; + dma_tcp->tcp_received_flag_count[1] += flag_ece ; + dma_tcp->tcp_received_flag_count[0] += flag_cwr ; + spot_fast_parse_options(skb,tcph) ; + } + +#endif +} + +static inline int deliver_eagerly(const dma_tcp_t * dma_tcp) +{ + return dma_tcp->tuning_deliver_eagerly ; +} +/* + * Frames from a source generally arrive in the order that they left the sender, but it is possible for some + * nondeterminism to be introduced because of adaptive routing and because 'short' frames get sent 'eagerly' rather than + * with DMA. + * It is desireable to deliver frames for a given TCP session in-order, otherwise the network layer may call for a + * 'fast' retransmit (thinking that a frame has been lost). This routine defers out-of-order frames until they can be + * presnted in-order. + */ +static void deliver_from_slot(dma_tcp_t * dma_tcp, unsigned int slot, unsigned int conn_id, struct sk_buff * skb) +{ + if( ! deliver_eagerly(dma_tcp)) + { + unsigned int slot_conn=get_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot) ; + unsigned int slot_advancement= (conn_id-slot_conn) & (k_concurrent_receives-1) ; + TRACEN(k_t_general,"slot=0x%08x conn_id=0x%08x slot_conn=0x%08x skb=%p slot_advancement=%d",slot,conn_id,slot_conn,skb,slot_advancement) ; + #if defined(CONFIG_BGP_STATISTICS) + dma_tcp->resequence_histogram[slot_advancement] += 1; + #endif + if( 0 == slot_advancement) + { + /* 'oldest' skb has arrived. Deliver it */ + frames_receive_torus(dma_tcp,skb) ; + /* and check if any 'arrivals ahead' can be delivered now */ + { + int x ; + struct sk_buff * slot_skb ; + for(x=1; x<k_concurrent_receives-1 && (NULL != (slot_skb = get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x))); x+=1) + { + TRACEN(k_t_general,"Delivering slot=0x%08x conn_id=0x%08x skb=%p",slot,slot_conn+x,slot_skb) ; + frames_receive_torus(dma_tcp,slot_skb) ; + set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x,NULL) ; + } + set_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot,slot_conn+x) ; + } + } + else + { + struct sk_buff * slot_skb_old = get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,conn_id); + TRACEN(k_t_general,"Queuing slot=0x%08x conn_id=0x%08x skb=%p skb->len=%d slot_skb_old=%p",slot,conn_id,skb,skb->len,slot_skb_old) ; + if( slot_skb_old) + { + /* Wrapped around all the possible reorder slots. Something seems to have gone missing. */ + TRACEN(k_t_error,"(E) resequence buffer wrapped, skb=%p conn_id=0x%08x. Delivering ",skb,conn_id) ; + /* and check if any 'arrivals ahead' can be delivered now */ + { + int x ; + struct sk_buff * slot_skb ; + for(x=0; x<k_concurrent_receives-1 && (NULL != (slot_skb = get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x))); x+=1) + { + TRACEN(k_t_general,"Delivering slot=0x%08x conn_id=0x%08x skb=%p",slot,slot_conn+x,slot_skb) ; + frames_receive_torus(dma_tcp,slot_skb) ; + set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x,NULL) ; + } + set_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot,slot_conn+x) ; + slot_conn = slot_conn+x ; + } + if( 0 == ((slot_conn-conn_id) & (k_concurrent_receives-1))) + { + /* Everything is delivered ... */ + frames_receive_torus(dma_tcp,skb) ; + set_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot,slot_conn+1) ; + } + else + { + /* There's another gap, save the skb for future delivery */ + set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,conn_id,skb) ; + } + + + } + else + { + set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,conn_id,skb) ; + } + + } + } + else + { + TRACEN(k_t_general,"slot=0x%08x conn_id=0x%08x skb=%p",slot,conn_id,skb) ; + if( TRACING(k_t_sgdiag_detail)) + { + unsigned int dump_length = ( skb_headlen(skb) < 256 ) ? skb_headlen(skb) : 256 ; + dumpmem(skb->data, dump_length, "received skb") ; + } + frames_receive_torus(dma_tcp,skb) ; + } + +} + +static void display_pending_slot(dma_tcp_t * dma_tcp,unsigned int slot) +{ +#if defined(RESEQUENCE_ARRIVALS) + unsigned int slot_conn=get_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot) ; + int x ; + int pending_count=0; + for(x=0; x<k_concurrent_receives; x+=1) + { + struct sk_buff * skb=get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x) ; + if(skb) + { + struct ethhdr *eth = (struct ethhdr *)(skb->data) ; + struct iphdr *iph = (struct iphdr *) (eth+1) ; + unsigned int saddr=iph->saddr ; + pending_count += 1; + TRACEN(k_t_request, + "(---) Pending slot=0x%08x slot_conn=0x%02x x=%d skb=%p skb->len=%d tot_len=0x%04x saddr=%d.%d.%d.%d\n", + slot,slot_conn & (k_concurrent_receives-1),x,skb,skb->len, iph->tot_len, + saddr>>24, + (saddr >> 16) & 0xff, + (saddr >> 8) & 0xff, + saddr & 0xff + ) ; + } + } + if( pending_count >0 ) + { + TRACEN(k_t_request,"slot=0x%08x pending_count=%d",slot,pending_count) ; + } + +#endif +} + +void bgp_dma_tcp_display_pending_slots(dma_tcp_t * dma_tcp, unsigned int nodecount ) +{ + unsigned int slot ; + for( slot=0; slot<nodecount; slot+=1 ) + { + display_pending_slot(dma_tcp,slot) ; + } +} + + +static void issueInlineFrameDataSingle(dma_tcp_t * dma_tcp, + void * request , + unsigned int src_key , + int payload_bytes) + { + unsigned int pad_head = src_key & 0x0f ; + TRACEN(k_t_detail | k_t_general,"(>)(%08x)", src_key); + if( k_dumpmem_diagnostic) + { + dumpmem(request,payload_bytes,"issueInlineFrameData") ; + } + { +/* We have a packet which represents a complete frame; quite a small frame ... */ + struct ethhdr *eth = (struct ethhdr *) (request+pad_head) ; + struct iphdr *iph = (struct iphdr *)(request+pad_head+sizeof(struct ethhdr)) ; + if( eth->h_proto == ETH_P_IP) + { + unsigned int totlen=iph->tot_len ; + int bytes_remaining = totlen+sizeof(struct ethhdr)+pad_head-payload_bytes ; + TRACEN(k_t_detail,"Frame total length=%d",totlen) ; + if( bytes_remaining <= 0) + { +/* Largest amount of data we might need is ... */ +/* k_injection_packet_size+k_torus_skb_alignment */ + struct sk_buff * skb = alloc_skb(k_injection_packet_size+k_torus_skb_alignment , GFP_ATOMIC); + if(skb ) + { + char * payload ; + skb_reserve(skb, k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment); + payload = skb->data ; +/* TODO: rewrite with 'memcpy' or a copy through integer regs, to avoid using FP now this is 'rare' */ +/* torus_frame_payload_load(request) ; */ +/* torus_frame_payload_store(payload) ; */ + torus_frame_payload_memcpy((torus_frame_payload *)payload,(torus_frame_payload *)request) ; + TRACEN(k_t_detail,"(=)(%08x) skb=%p payload=%p bytes_remaining=%d", src_key,skb,skb->data,bytes_remaining); + skb_reserve(skb,pad_head) ; + skb_put(skb,totlen+sizeof(struct ethhdr)) ; + analyse_tcp_flags(dma_tcp, skb) ; + deliver_from_slot(dma_tcp,-1,-1,skb) ; + } + else + { + TRACEN(k_t_protocol,"(E) (%08x) skb was null", src_key); + dma_tcp->device_stats->rx_dropped += 1; + if( k_detail_stats) + { + dma_tcp->count_no_skbuff += 1 ; + } + } + } + else + { + TRACEN(k_t_protocol,"(E) frame does not fit packet, discarded"); + dma_tcp->device_stats->rx_frame_errors += 1; + } + } + else + { + TRACEN(k_t_protocol,"Packet not IP ethhdr=[%02x:%02x:%02x:%02x:%02x:%02x][%02x:%02x:%02x:%02x:%02x:%02x](%04x)", + eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5], + eth->h_source[0],eth->h_source[1],eth->h_source[2],eth->h_source[3],eth->h_source[4],eth->h_source[5], + eth->h_proto) ; + dma_tcp->device_stats->rx_frame_errors += 1; + } + } + TRACEN(k_t_detail,"(<)((%08x)", src_key); + } + +static int issueInlineFrameDataSingleActor(DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) + { + unsigned int SW_Arg=packet_ptr->SW_Arg ; +/* enable_kernel_fp() ; // TODO: don't think this is needed nowadays */ + + issueInlineFrameDataSingle( + (dma_tcp_t *) recv_func_parm, + (void *) payload_ptr, + SW_Arg, + payload_bytes + ) ; + return 0 ; + } + +#if defined(USE_ADAPTIVE_ROUTING) +typedef struct +{ + unsigned int conn_id ; + unsigned int packet_count ; + unsigned int packets_to_go ; + int framestart_offset ; + int prev_offset ; /* For constructing 'reordering' statistics */ +} adaptive_skb_cb_t; + +static void issueInlineFrameDataAdaptive(dma_tcp_t * dma_tcp, + void * request , + unsigned int src_key , + int payload_bytes, + int Put_Offset + ) + { + unsigned int conn_id = ((unsigned int) Put_Offset) >> 25 ; + unsigned int packet_count = (((unsigned int) Put_Offset) >> 16) & 0x1ff ; + int offset_in_frame = (Put_Offset & 0xfff0) | 0xffff0000 ; + unsigned int node_slot_mask=dma_tcp->node_slot_mask ; + rcv_t *rcvdemux = &dma_tcp->rcvdemux ; + unsigned int slot = (src_key >> 4) & node_slot_mask ; + unsigned int pad_head = src_key & 0x0f ; + struct sk_buff * candidate_skb=get_rcv_skb_for_conn(rcvdemux,slot,conn_id) ; + TRACEN(k_t_detail, + "(>) request=%p slot=%08x pad_head=0x%08x payload_bytes=0x%02x Put_Offset=0x%08x\n", + request,slot,pad_head,payload_bytes,Put_Offset); + if( candidate_skb) + { + adaptive_skb_cb_t * askb=(adaptive_skb_cb_t *)(candidate_skb->cb) ; + if(askb->conn_id != conn_id || askb->packet_count != packet_count) + { + TRACEN(k_t_error,"(E) askb mismatch, slot=%08x askb->conn_id=%04x conn_id=%04x askb->packet_count=%04x packet_count=%04x askb->packets_to_go=%04x", + slot,askb->conn_id,conn_id,askb->packet_count,packet_count,askb->packets_to_go) ; + dev_kfree_skb(candidate_skb) ; + candidate_skb = NULL ; + } + } + if( NULL == candidate_skb) + { + instrument_flow(dma_tcp,k_receive_eager) ; + candidate_skb=alloc_skb(packet_count*k_injection_packet_size+2*k_torus_skb_alignment+k_injection_packet_size,GFP_ATOMIC) ; /* TODO: refine the size */ + if( candidate_skb) + { + adaptive_skb_cb_t * askb=(adaptive_skb_cb_t *)(candidate_skb->cb) ; + askb->conn_id = conn_id ; + askb->packet_count = packet_count ; + askb->packets_to_go = packet_count ; + askb->framestart_offset = 0 ; + askb->prev_offset = -65536 ; + skb_reserve(candidate_skb, (k_torus_skb_alignment - ((unsigned int)(candidate_skb->data)) % k_torus_skb_alignment)); + skb_put(candidate_skb,packet_count*k_injection_packet_size) ; + } + else + { + TRACEN(k_t_error,"skbuff allocation failed packet_count=%d slot=0x%08x conn_id=0x%08x",packet_count,slot,conn_id) ; + } + set_rcv_skb_for_conn(rcvdemux,slot,conn_id,candidate_skb) ; + } + if( candidate_skb) + { + unsigned char * end_of_frame=candidate_skb->tail ; + unsigned char * target = end_of_frame+offset_in_frame ; + int cand_start_offset = offset_in_frame + pad_head ; + TRACEN(k_t_detail,"candidate_skb skb=%p head=%p data=%p tail=%p end=%p offset_in_frame=0x%08x target=%p cand_start_offset=0x%08x", + candidate_skb,candidate_skb->head,candidate_skb->data,candidate_skb->tail,candidate_skb->end,offset_in_frame,target,cand_start_offset) ; + if( target < candidate_skb->head) + { + TRACEN(k_t_error,"data offset outside skb, dropping packet") ; + } + else + { + adaptive_skb_cb_t * askb=(adaptive_skb_cb_t *)(candidate_skb->cb) ; + int new_packets_to_go=askb->packets_to_go - 1 ; + int prev_offset = askb->prev_offset ; +#if defined(USE_ADAPTIVE_ROUTING) +/* Statistics, count how often a packet came out-of-order */ + if( offset_in_frame < prev_offset) + { + instrument_flow(dma_tcp,k_reordered) ; + } + askb->prev_offset = offset_in_frame ; +#endif + if( cand_start_offset < askb->framestart_offset ) + { + askb->framestart_offset=cand_start_offset ; + } + + TRACEN(k_t_detail,"memcpy(%p,%p,0x%08x) new_packets_to_go=%d", + target,request,payload_bytes,new_packets_to_go) ; + if( payload_bytes == k_injection_packet_size) + { + /* doublehummer memcpy optimisation for 'full' packet */ + /* TODO: rewrite with 'memcpy' or a copy through integer regs, to avoid using FP now this is 'rare' */ + torus_frame_payload_memcpy((torus_frame_payload *)target,(torus_frame_payload *)request) ; + } + else + { + memcpy(target,request,payload_bytes) ; + } + if( new_packets_to_go <= 0) + { + analyse_tcp_flags(dma_tcp, candidate_skb) ; + skb_reserve(candidate_skb,packet_count*k_injection_packet_size+askb->framestart_offset); + dumpframe(candidate_skb->data,candidate_skb->len,"Proposed frame") ; + deliver_from_slot(dma_tcp,slot,conn_id,candidate_skb) ; + set_rcv_skb_for_conn(rcvdemux,slot,conn_id,NULL) ; + } + else + { + askb->packets_to_go = new_packets_to_go ; + } + } + } + else + { + TRACEN(k_t_error,"(E) No memory for skb, dropping packet") ; + } + + } + +static int issueInlineFrameDataAdaptiveActor(DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) + { + unsigned int SW_Arg=packet_ptr->SW_Arg ; + int Put_Offset=packet_ptr->Put_Offset ; +/* enable_kernel_fp() ; // TODO: don't think this is needed nowadays */ + + issueInlineFrameDataAdaptive( + (dma_tcp_t *) recv_func_parm, + (void *) payload_ptr, + SW_Arg, + payload_bytes, + Put_Offset + ) ; + return 0 ; + } +#endif + +#if defined(AUDIT_FRAME_HEADER) + +frame_header_t all_headers_in_counters[DMA_NUM_COUNTERS_PER_GROUP] ; +#endif +unsigned int tot_len_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ; + +static inline void create_dma_descriptor_propose_accept(dma_tcp_t *dma_tcp, + void * address, + unsigned int length, + unsigned int x, unsigned int y, unsigned int z, + unsigned int proto, + unsigned int SW_Arg, + unsigned int conn_id, + unsigned int tag, + DMA_InjDescriptor_t *desc, + unsigned int propose_length + ) +{ + dma_addr_t dataAddr ; + int ret1 ; + int PutOffset = (conn_id << 25) | (tag << 16) | ((-length) & 0xfff0) ; + TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d proto=%d desc=%p",address,length,x,y,z,proto,desc); + dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE); + ret1 = DMA_TorusMemFifoDescriptor( desc, + x, y, z, + k_ReceptionFifoGroup, /* recv fifo grp id */ + 0, /* hints */ + virtual_channel(dma_tcp,k_VC_anyway), /* vc - adaptive */ + SW_Arg, /* softw arg */ + proto, /* function id */ + k_InjectionCounterGroup, /* inj cntr group id */ + k_injCounterId, /* inj counter id */ + dataAddr, /* send address */ + propose_length /* proposal length */ + ); + if(ret1 != 0 ) + { + TRACEN(k_t_error,"(E) ret1=%d",ret1) ; + } + + DMA_DescriptorSetPutOffset(desc,PutOffset) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to pass required data to receive actor */ + + TRACEN(k_t_general , "(<) ret1=%d",ret1); + +} + +static inline unsigned int ethhdr_src_x(struct ethhdr * eth) +{ + return eth->h_source[3] ; +} +static inline unsigned int ethhdr_src_y(struct ethhdr * eth) +{ + return eth->h_source[4] ; +} +static inline unsigned int ethhdr_src_z(struct ethhdr * eth) +{ + return eth->h_source[5] ; +} + +static inline unsigned int ethhdr_dest_x(struct ethhdr * eth) +{ + return eth->h_dest[3] ; +} +static inline unsigned int ethhdr_dest_y(struct ethhdr * eth) +{ + return eth->h_dest[4] ; +} +static inline unsigned int ethhdr_dest_z(struct ethhdr * eth) +{ + return eth->h_dest[5] ; +} + +#if defined(USE_SKB_TO_SKB) +static int get_reception_counter(dma_tcp_t * dma_tcp) +{ + unsigned int counters_available = dma_tcp->qtyFreeRecCounters ; + if( counters_available > 0) + { + int cx ; + int scanRecCounter=dma_tcp->scanRecCounter ; + dma_tcp->qtyFreeRecCounters=counters_available-1 ; + for(cx=0;cx<DMA_NUM_COUNTERS_PER_GROUP;cx+=1) + { + int cxx=(scanRecCounter+cx) & (DMA_NUM_COUNTERS_PER_GROUP-1) ; + if(0 == dma_tcp->recCntrInUse[cxx]) + { + dma_tcp->scanRecCounter=cxx+1 ; + dma_tcp->recCntrInUse[cxx] = 1 ; + return cxx ; + } + } + TRACEN(k_t_error,"(E) Should have been %d counters available",counters_available) ; + } + return -1 ; /* No reception counters available */ +} + +enum { + k_PSKB_noRecCounter = 0x01 , + k_PSKB_freedRecCounter = 0x02 +}; +typedef struct +{ + unsigned int src_key ; + unsigned int slot ; + unsigned int conn_id ; + unsigned short tot_len ; + unsigned char pad_head ; +} propose_skb_cb ; + +/* Frame injection control, may live in skb->cb . */ +/* 'desc' describes the 'non-fragmented' initial part of the skb data; code where the ficb is used will */ +/* handle what has to happen to get the 'fragmented' part of the skb sent out */ +enum { + k_cattle_class, + k_first_class +}; + +static int bgp_dma_tcp_s_and_f_frames_prepared( + dma_tcp_t *dma_tcp, + struct sk_buff *skb, + unsigned int queue_at_head, + unsigned int transport_class + ) ; + +static int isProp(dma_tcp_t * dma_tcp,struct ethhdr *eth,struct iphdr *iph) +{ + int h_source_x=eth->h_source[3] ; + int h_source_y=eth->h_source[4] ; + int h_source_z=eth->h_source[5] ; + int my_x=dma_tcp->location.coordinate[0] ; + int my_y=dma_tcp->location.coordinate[1] ; + int my_z=dma_tcp->location.coordinate[2] ; + + if( h_source_x == my_x && h_source_y == my_y && h_source_z == my_z ) + { + TRACEN(k_t_general,"non-propose from (%d,%d,%d)",eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]) ; + return 0 ; + } + return 1 ; +} + +static int bgp_dma_tcp_s_and_f_frames_prepared( + dma_tcp_t *dma_tcp, + struct sk_buff *skb, + unsigned int queue_at_head, + unsigned int transport_class + ) ; + +struct accepthdr { + struct iphdr iph ; + unsigned int conn_id ; + int reception_counter ; +}; + +static inline void create_dma_descriptor_direct_put_offset(dma_tcp_t *dma_tcp, + unsigned int x, unsigned int y, unsigned int z, + int injection_counter, + int reception_counter, + dma_addr_t dataAddr, + int msglen, + DMA_InjDescriptor_t *desc, + unsigned int offset + ) ; + +static void display_iphdr(struct iphdr *iph) +{ + TRACEN(k_t_request,"iphdr tot_len=0x%04x saddr=0x%08x daddr=0x%08x",iph->tot_len,iph->saddr,iph->daddr) ; +} + +static unsigned int counted_length(struct sk_buff *skb) +{ + unsigned int rc=skb_headlen(skb) ; + int f ; + int nfrags = skb_shinfo(skb)->nr_frags ; + struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[0] ; + for(f=0; f<nfrags; f+=1) + { + rc += frag[f].size ; + } + return rc ; + +} + +static int audit_skb_at_accept(dma_tcp_t * dma_tcp,struct sk_buff *skb, unsigned int totlen_at_propose, struct iphdr *iph_at_rcv) +{ + unsigned int ctlen = counted_length(skb) ; + if( totlen_at_propose == 0 || totlen_at_propose > dma_tcp->mtu || totlen_at_propose != iph_at_rcv->tot_len || totlen_at_propose +sizeof(struct ethhdr) != ctlen) + { + TRACEN(k_t_error,"(E) skb=%p inconsistent, totlen_at_propose=0x%04x iph_at_rcv->tot_len=0x%04x skb->data_len=0x%04x counted_length(skb)=0x%04x", + skb, totlen_at_propose, iph_at_rcv->tot_len, skb->data_len, ctlen + ) ; + display_skb_structure(skb) ; + display_iphdr(iph_at_rcv) ; + instrument_flow(dma_tcp,k_accept_audit_fail) ; + return 1 ; + } + return 0 ; +} +void issue_accept(dma_tcp_t * dma_tcp,struct accepthdr * accepth, unsigned int src_key ) +{ + unsigned int conn_id=accepth->conn_id ; + int reception_counter=accepth->reception_counter ; + unsigned int node_slot_mask=dma_tcp->node_slot_mask ; + unsigned int slot = (src_key >> 4) & node_slot_mask ; + struct sk_buff *skb=get_tx_skb(&dma_tcp->tx_mux,slot,conn_id) ; + TRACEN(k_t_general,"src_key=0x%08x conn_id=0x%08x reception_counter=0x%08x",src_key,conn_id,reception_counter) ; + instrument_flow(dma_tcp,k_act_accept_rpc) ; + if( skb) + { + struct ethhdr* eth = (struct ethhdr*)(skb->data) ; + unsigned int x=ethhdr_dest_x(eth) ; + unsigned int y=ethhdr_dest_y(eth) ; + unsigned int z=ethhdr_dest_z(eth) ; + frame_injection_cb *ficb = (frame_injection_cb *) skb->cb ; + unsigned int payload_length = skb_headlen(skb) ; + unsigned int payload_address = (unsigned int)(skb->data) ; + unsigned int pad_head = payload_address & 0x0f ; + unsigned int aligned_payload_length = payload_length + pad_head ; + dma_addr_t dataAddr = dma_map_single(NULL, skb->data-pad_head, aligned_payload_length, DMA_TO_DEVICE); + + set_tx_skb(&dma_tcp->tx_mux,slot,conn_id,NULL) ; + TRACEN(k_t_general,"Cop from slot=0x%08x conn_id=0x%04x reception_counter=0x%02x skb=%p x=%d y=%d z=%d msglen=0x%04x", + slot,conn_id,reception_counter,skb, x,y,z,payload_length+pad_head) ; + if(TRACING(k_t_sgdiag)) + { + TRACEN(k_t_sgdiag,"Cop from slot=0x%08x conn_id=0x%04x reception_counter=0x%02x skb=%p x=%d y=%d z=%d msglen=0x%04x", + slot,conn_id,reception_counter,skb, x,y,z,payload_length+pad_head) ; + diag_skb_structure(skb) ; + } +#if defined(AUDIT_HEADLEN) + { + int rca = audit_skb_at_accept(dma_tcp,skb,ficb->tot_len,&accepth->iph) ; + if( rca) + { + TRACEN(k_t_error,"(!!!) dropping skb, will cause (x=%d y=%d z=%d) counter 0x%02x to leak", x,y,z,reception_counter) ; + dev_kfree_skb(skb) ; + return ; + } + } +#endif + { + int transfer_length = k_abbreviate_headlen ? (payload_length+pad_head-eth->h_source[0]) : (payload_length+pad_head) ; + dma_addr_t transfer_address = k_abbreviate_headlen ? (dataAddr+eth->h_source[0]) : dataAddr ; + unsigned int receive_offset = k_abbreviate_headlen ? eth->h_source[0] : 0 ; + if( 0 != transfer_length) + { + create_dma_descriptor_direct_put_offset( + dma_tcp,x, y, z,k_injCounterId,reception_counter,transfer_address,transfer_length,&ficb->desc,receive_offset + ) ; + } + else + { + TRACEN(k_t_general,"(I) head length is zero") ; + /* Set up a descriptor for a non-zero length, then set its length to zero so that code later on can pick up the special case */ + create_dma_descriptor_direct_put_offset( + dma_tcp,x, y, z,k_injCounterId,reception_counter,transfer_address,1,&ficb->desc,receive_offset + ) ; + ficb->desc.msg_length = 0 ; + instrument_flow(dma_tcp,k_headlength_zero) ; + } + } + ficb->free_when_done=1 ; + bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp, skb, 0, k_first_class) ; + + } + else + { + TRACEN(k_t_error,"(E) Cop from slot=0x%08x conn_id=0x%04x reception_counter=0x%02x skb is null", + slot,conn_id,reception_counter ) ; + } +} + +static int should_park(dma_tcp_t * dma_tcp,unsigned int proposals_active, unsigned int x0, unsigned int y0, unsigned int z0) +{ + unsigned int free_counters = dma_tcp->qtyFreeRecCounters ; + unsigned int tuning_counters_per_source = dma_tcp->tuning_counters_per_source ; +/* unsigned int reported_transmission_fifo = report_transmission_fifo(dma_tcp,x0,y0,z0) ; */ + return ( tuning_counters_per_source > 0 ) + ? (proposals_active > tuning_counters_per_source ) + : ((proposals_active > 1) && (proposals_active * proposals_active > free_counters )) ; +} + +static void stamp_skb(struct sk_buff *skb, unsigned int size ) +{ + if( skb->data + size <= skb->end) + { + memset(skb->data,0x11,size) ; + } + else + { + TRACEN(k_t_error,"(E) Stamp for 0x%08x bytes out of range, skb=%p head=%p data=%p tail=%p end=%p, skipped", + size,skb,skb->head,skb->data,skb->tail,skb->end) ; + } +} + +static inline int defer_skb_for_counter(const dma_tcp_t * dma_tcp) +{ + return k_allow_defer_skb_for_counter ? dma_tcp->tuning_defer_skb_until_counter : 0 ; +} +static void receive_skb_using_counter(dma_tcp_t *dma_tcp,struct sk_buff *skb_next, unsigned int counter_index, + unsigned int pad_head, unsigned int slot, unsigned int conn_id, + unsigned int x, unsigned int y,unsigned int z, + unsigned int tot_len, + unsigned int src_key) ; +static void pending_rcv_skb_queue(dma_tcp_t *dma_tcp, struct sk_buff * skb, unsigned int x0, unsigned int y0, unsigned int z0 ) +{ +/* if( 1 == dma_tcp->tuning_select_fifo_algorithm) */ +/* { */ +/* skb_queue_tail(&dma_tcp->balancer.b[k_pending_rcv_skb_classes-1].pending_rcv_skbs,skb) ; */ +/* } */ +/* else */ +/* { */ + unsigned int reported_fifo=report_transmission_fifo(dma_tcp,x0,y0,z0) ; + TRACEN(k_t_general,"skb=%p would come from fifo=%d on node [%d,%d,%d]",skb,reported_fifo,x0,y0,z0) ; + if( reported_fifo < k_pending_rcv_skb_classes) + { + skb_queue_tail(&dma_tcp->balancer.b[reported_fifo].pending_rcv_skbs,skb) ; + } + else + { + TRACEN(k_t_error,"(!!!) skb=%p would come from fifo=%d on node [%d,%d,%d] (out of range)",skb,reported_fifo,x0,y0,z0) ; + skb_queue_tail(&dma_tcp->balancer.b[0].pending_rcv_skbs,skb) ; + } +/* } */ +} + +static inline int over_quota(bgp_dma_balancer_direction *b) +{ + int ql = skb_queue_len(&b->pending_rcv_skbs) ; + return ql ? b->outstanding_counters : 0x7fffffff ; +} +static struct sk_buff* pending_rcv_skb_dequeue(dma_tcp_t *dma_tcp) +{ + unsigned int q=0 ; + int qq=over_quota(dma_tcp->balancer.b+0) ; + int x ; + for(x=1;x<k_pending_rcv_skb_classes;x+=1) + { + int qp=over_quota(dma_tcp->balancer.b+x) ; + if( qp < qq) + { + qq=qp ; + q=x ; + } + } + return skb_dequeue(&dma_tcp->balancer.b[q].pending_rcv_skbs) ; +} + +static void issueProp(dma_tcp_t * dma_tcp, + void * request , + unsigned int src_key , + int payload_bytes, + int Put_Offset + ) + { + unsigned int conn_id = ((unsigned int) Put_Offset) >> 25 ; + unsigned int node_slot_mask=dma_tcp->node_slot_mask ; + unsigned int slot = (src_key >> 4) & node_slot_mask ; + unsigned int pad_head = src_key & 0x0f ; + + struct ethhdr *eth = (struct ethhdr *)(request+pad_head) ; + unsigned int eth_proto = eth->h_proto ; + + struct iphdr *iph = (struct iphdr *) (eth+1) ; + unsigned int tot_len=iph->tot_len ; + if( isProp(dma_tcp,eth,iph)) + { + unsigned int x=ethhdr_src_x(eth) ; + unsigned int y=ethhdr_src_y(eth) ; + unsigned int z=ethhdr_src_z(eth) ; + rcv_t *rcvdemux = &dma_tcp->rcvdemux ; + unsigned int proposals_active=get_proposals_active(rcvdemux,slot) ; + instrument_flow(dma_tcp,k_act_propose_rpc) ; + set_proposals_active(rcvdemux,slot,proposals_active+1) ; + /* If we're flow controlling by counters, we have a choice here. */ + /* We can either get on with it, or park it for later when a previously-started frame completes */ + if( 0 == k_counter_flow_control || ! should_park(dma_tcp,proposals_active,x,y,z) ) + { + int reception_counter=get_reception_counter(dma_tcp) ; + TRACEN(k_t_general|k_t_sgdiag,"Prop from slot=0x%08x conn_id=0x%04x eth_proto=0x%04x pad_head=0x%02x tot_len=0x%04x x=0x%02x y=0x%02x z=0x%02x msglen=0x%04x payload_bytes=0x%02x", slot,conn_id,eth_proto,pad_head,tot_len, x, y, z,tot_len+pad_head, payload_bytes) ; + + /* Now we need an 'skbuff' and a reception counter. Reception counters might be scarce */ + if( reception_counter != -1 ) + { + unsigned int allocation_size=tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment ;/* TODO: refine the size */ + struct sk_buff *skb = alloc_skb((allocation_size > 256) ? allocation_size : 256, GFP_ATOMIC) ; /* TODO: refine the size */ + if( skb) + { + if(k_scattergather_diagnostic) stamp_skb(skb,tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ; + skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head); + /* Bring in the frame header for diagnosis later ... */ + memcpy(skb->data-pad_head,request,payload_bytes) ; + skb_put(skb,tot_len+sizeof(struct ethhdr)) ; + if( k_scattergather_diagnostic) display_skb_structure(skb) ; + { + receive_skb_using_counter(dma_tcp,skb,reception_counter,pad_head,slot,conn_id,x,y,z,tot_len,src_key) ; + } + } + else + { + TRACEN(k_t_error,"(E) No memory available for skbuff") ; + } + } + else + { + unsigned int allocation_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes+2*k_torus_skb_alignment) : (tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ; + unsigned int put_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes-pad_head) : (tot_len+sizeof(struct ethhdr)) ; + /* TODO: Defer allocation of the full-size sk_buff until a reception counter is available */ + struct sk_buff *skb = alloc_skb((allocation_size > 256) ? allocation_size : 256, GFP_ATOMIC) ; /* TODO: refine the size */ + TRACEN(k_t_general,"allocation_size=0x%04x put_size=0x%04x skb=%p",allocation_size,put_size,skb) ; + instrument_flow(dma_tcp, k_no_reception_counter) ; + if( skb) + { + if(k_scattergather_diagnostic) stamp_skb(skb,allocation_size) ; + skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head); + /* Bring in the frame header for diagnosis later ... */ + memcpy(skb->data-pad_head,request,payload_bytes) ; + skb_put(skb,put_size) ; + if( k_scattergather_diagnostic) display_skb_structure(skb) ; + { + propose_skb_cb * pskbcb = (propose_skb_cb *)skb->cb ; + pskbcb->src_key=src_key ; + pskbcb->slot = slot ; + pskbcb->conn_id = conn_id ; + pskbcb->tot_len = tot_len ; + pskbcb->pad_head = pad_head ; + } + instrument_flow(dma_tcp,k_defer_accept_rpc_counters) ; + pending_rcv_skb_queue(dma_tcp,skb,x,y,z) ; + TRACEN(k_t_flowcontrol|k_t_general,"No reception counters (%d,%d,%d) skb=%p src_key=0x%08x slot=0x%08x conn_id=0x%08x tot_len=0x%04x pad_head=0x%02x",x,y,z,skb,src_key,slot,conn_id,tot_len,pad_head) ; + } + else + { + TRACEN(k_t_error,"(E) No memory available for skbuff") ; + } + } + } + else + { + /* Park the 'propose' until a previous frame from this node completes */ + + unsigned int allocation_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes+2*k_torus_skb_alignment) : (tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ; + unsigned int put_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes-pad_head) : (tot_len+sizeof(struct ethhdr)) ; + /* TODO: Defer allocation of the full-size sk_buff until a reception counter is available */ + struct sk_buff *skb = alloc_skb(allocation_size, GFP_ATOMIC) ; /* TODO: refine the size */ + TRACEN(k_t_general,"allocation_size=0x%04x put_size=0x%04x skb=%p",allocation_size,put_size,skb) ; + instrument_flow(dma_tcp, k_parked) ; + if( skb) + { + if(k_scattergather_diagnostic) stamp_skb(skb,allocation_size) ; + skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head); + /* Bring in the frame header for diagnosis later ... */ + memcpy(skb->data-pad_head,request,payload_bytes) ; + skb_put(skb,put_size) ; + if( k_scattergather_diagnostic) display_skb_structure(skb) ; + { + propose_skb_cb * pskbcb = (propose_skb_cb *)skb->cb ; + pskbcb->src_key=src_key ; + pskbcb->slot = slot ; + pskbcb->conn_id = conn_id ; + pskbcb->tot_len = tot_len ; + pskbcb->pad_head = pad_head ; + } + instrument_flow(dma_tcp,k_defer_accept_rpc_nodeflow) ; + enq_pending_flow(&dma_tcp->rcvdemux,slot,skb) ; + TRACEN(k_t_general,"Flow control (%d,%d,%d) skb=%p src_key=0x%08x slot=0x%08x conn_id=0x%08x tot_len=0x%04x pad_head=0x%02x proposals_active=%d qtyFreeRecCounters=%d",x,y,z,skb,src_key,slot,conn_id,tot_len,pad_head,proposals_active,dma_tcp->qtyFreeRecCounters) ; + } + else + { + TRACEN(k_t_error,"(E) No memory available for skbuff") ; + } + } + } + else + { + /* an 'accept' packet sent as a modified 'propose' ... */ + struct accepthdr * accepth=(struct accepthdr *)(eth+1) ; + TRACEN(k_t_general,"'accept' src_key=0x%08x",src_key) ; + issue_accept(dma_tcp,accepth,src_key) ; + } + } + +static int issuePropActor(DMA_RecFifo_t *f_ptr, + DMA_PacketHeader_t *packet_ptr, + void *recv_func_parm, + char *payload_ptr, + int payload_bytes + ) + { + unsigned int SW_Arg=packet_ptr->SW_Arg ; + int Put_Offset=packet_ptr->Put_Offset ; + + issueProp( + (dma_tcp_t *) recv_func_parm, + (void *) payload_ptr, + SW_Arg, + payload_bytes, + Put_Offset + ) ; + return 0 ; + } +typedef struct +{ + unsigned int reception_counter ; + unsigned char x, y, z ; +} accept_skb_cb ; + +static inline void create_dma_descriptor_direct_put_offset(dma_tcp_t *dma_tcp, + unsigned int x, unsigned int y, unsigned int z, + int injection_counter, + int reception_counter, + dma_addr_t dataAddr, + int msglen, + DMA_InjDescriptor_t *desc, + unsigned int offset + ) +{ + int ret1 __attribute((unused)); + TRACEN(k_t_general|k_t_sgdiag , "(>) injecting x=%d y=%d z=%d injection_counter=0x%02x reception_counter=0x%02x dataAddr=0x%08llx msglen=0x%08x desc=%p offset=0x%04x", + x,y,z,injection_counter,reception_counter,dataAddr,msglen,desc,offset); + ret1 = DMA_TorusDirectPutDescriptor( desc, + x, y, z, + 0, /* hints */ + virtual_channel(dma_tcp,k_VC_anyway), /* vc - adaptive */ + k_InjectionCounterGroup, /* inj cntr group id */ + injection_counter, /* inj counter id */ + dataAddr, /* send offset */ + 0, /* rec ctr grp */ + reception_counter, + offset, /* reception offset */ + msglen /* message length */ + ); + TRACEN(k_t_general , "(<) ret1=%d",ret1); + +} + +#endif + +static void receive_skb_using_counter(dma_tcp_t *dma_tcp,struct sk_buff *skb_next, unsigned int counter_index, + unsigned int pad_head, unsigned int slot, unsigned int conn_id, + unsigned int x, unsigned int y,unsigned int z, + unsigned int tot_len, + unsigned int src_key) +{ + struct ethhdr* eth=(struct ethhdr *)(skb_next->data) ; + dma_addr_t dataAddr = dma_map_single(NULL, skb_next->data-pad_head, skb_next->len+pad_head, DMA_FROM_DEVICE); + frame_injection_cb * ficb = (frame_injection_cb *) skb_next->cb ; + unsigned int counter_base=dataAddr>>4 ; + unsigned int counter_max=((dataAddr+tot_len+pad_head+sizeof(struct ethhdr)) >> 4)+1 ; + unsigned int propose_len = eth->h_source[0] ; + unsigned int dma_count = k_abbreviate_headlen ? (skb_next->len+pad_head-propose_len) : (skb_next->len+pad_head) ; + +#if defined(AUDIT_FRAME_HEADER) + memcpy(all_headers_in_counters+counter_index,skb_next->data,sizeof(frame_header_t)) ; +#endif + + dma_tcp->balancer.b[report_transmission_fifo(dma_tcp,x,y,z)].outstanding_counters += 1 ; + + dma_tcp->slot_for_rcv[counter_index]=slot ; + dma_tcp->conn_for_rcv[counter_index]=conn_id | 0x80 ; /* Mark it up as having been delayed */ + TRACEN(k_t_general|k_t_scattergather|k_t_sgdiag,"Reception counter 0x%02x [%08x %08x %08x] assigned to (%d,%d,%d) conn_id=0x%08x skb=%p propose_len=0x%02x", + counter_index,dma_count,counter_base,counter_max,x,y,z,conn_id,skb_next,propose_len) ; + ficb->free_when_done = 0 ; + + dma_tcp->rcv_skbs[counter_index] = skb_next ; + dma_tcp->rcv_timestamp[counter_index] = jiffies ; + { + unsigned int proposed_dma_length = tot_len+pad_head+sizeof(struct ethhdr) ; + unsigned int available_skb_length = skb_next->end - (skb_next->data-pad_head) ; + if( proposed_dma_length > available_skb_length ) + { + TRACEN(k_t_error,"(!!!) skb=%p not big enough, dma=0x%08x bytes, pad_head=0x%02x, skb(head=%p data=%p tail=%p end=%p)", + skb_next,proposed_dma_length,pad_head,skb_next->head,skb_next->data,skb_next->tail,skb_next->end + ) ; + show_stack(NULL,NULL) ; + } + } + DMA_CounterSetValueBaseMaxHw(dma_tcp->recCounterGroup.counter[counter_index].counter_hw_ptr,dma_count,dataAddr >> 4, ((dataAddr+tot_len+pad_head+sizeof(struct ethhdr)) >> 4)+1) ; + instrument_flow(dma_tcp,k_send_accept_rpc) ; + { + /* Push out a 'reverse propose' frame, adjust it so it overlays the area beyond the initial frame which will be replaced by the response DMA */ + struct iphdr* iph = (struct iphdr*)(eth+1) ; + struct ethhdr* accept_eth0 = (struct ethhdr *)(iph+1) ; + struct ethhdr* accept_eth = (struct ethhdr *)(skb_next->data-pad_head+propose_len) ; + struct accepthdr * accepth=(struct accepthdr *)(accept_eth+1) ; + TRACEN(k_t_general,"accept_eth0=%p accepth=%p",accept_eth0,accept_eth) ; + tot_len_for_rcv[counter_index] = iph->tot_len ; // For diagnostics if the torus hangs + memcpy(accept_eth,eth,sizeof(struct ethhdr)) ; + memcpy(&accepth->iph,iph,sizeof(iph)) ; // TODO: Diagnose the apparent 'scribble' at the sender, then take this away + accepth->conn_id=conn_id ; + accepth->reception_counter=counter_index ; + if( (unsigned int)(accepth+1) > (unsigned int)(skb_next->end)) + { + TRACEN(k_t_error,"(!!!) skb=%p not big enough, (accepth+1)=%p, skb(head=%p data=%p tail=%p end=%p)", + skb_next,accepth+1,skb_next->head,skb_next->data,skb_next->tail,skb_next->end + ) ; + show_stack(NULL,NULL) ; + + } + TRACEN(k_t_general,"accept_eth=%p accepth=%p src_key=0x%08x conn_id=0x%08x counter_index=0x%08x",accept_eth,accepth,src_key,conn_id,counter_index) ; + create_dma_descriptor_propose_accept(dma_tcp, + (void *)(accept_eth), + 48, + x,y, z, + dma_tcp->proto_transfer_propose, + (dma_tcp->src_key << 4), + conn_id, + 0, + &ficb->desc, + 48 + ) ; + DMA_CounterSetEnableById(&dma_tcp->recCounterGroup,counter_index) ; + bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp,skb_next,0, k_first_class) ; + } + +} + +static void handle_empty_recCounter_deliver(dma_tcp_t *dma_tcp, unsigned int counter_index) +{ + rcv_t *rcvdemux = &dma_tcp->rcvdemux ; + struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ; + unsigned int slot = dma_tcp->slot_for_rcv[counter_index] ; + unsigned int proposals_active=get_proposals_active(rcvdemux,slot) ; + set_proposals_active(rcvdemux,slot,proposals_active-1) ; + TRACEN(k_t_general|k_t_sgdiag,"counter_index=0x%02x skb=%p",counter_index,skb) ; + if( skb) + { +#if defined(AUDIT_FRAME_HEADER) + if(memcmp(skb->data,((char *)(all_headers_in_counters+counter_index)),32)) + { + TRACEN(k_t_request,"(!!!) header not as first seen") ; + dumpmem(skb->data,sizeof(frame_header_t),"header-now") ; + dumpmem(all_headers_in_counters+counter_index,sizeof(frame_header_t),"header-in-propose") ; + + } +#endif + + { + struct ethhdr *eth=(struct ethhdr *)(skb->data) ; + unsigned int x=ethhdr_src_x(eth) ; + unsigned int y=ethhdr_src_y(eth) ; + unsigned int z=ethhdr_src_z(eth) ; + eth->h_source[0] = eth->h_dest[0] ; // Replug the item that got taken for DMA sideband + dma_tcp->balancer.b[report_transmission_fifo(dma_tcp,x,y,z)].outstanding_counters -= 1 ; + } + deliver_from_slot(dma_tcp,slot,dma_tcp->conn_for_rcv[counter_index],skb) ; + } + else + { + TRACEN(k_t_error,"(E) counter_index=0x%02x no skbuff, slot=0x%08x proposals_active=%d",counter_index,slot,proposals_active) ; + } + +} + +static void handle_empty_recCounter_flush(dma_tcp_t *dma_tcp, unsigned int counter_index) +{ + rcv_t *rcvdemux = &dma_tcp->rcvdemux ; + struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ; + unsigned int slot = dma_tcp->slot_for_rcv[counter_index] ; + unsigned int proposals_active=get_proposals_active(rcvdemux,slot) ; + unsigned int counter_value = DMA_CounterGetValueNoMsync(dma_tcp->recCounterGroup.counter+counter_index) ; + set_proposals_active(rcvdemux,slot,proposals_active-1) ; + TRACEN(k_t_request,"(!!!) flushing counter_index=0x%02x skb=%p",counter_index,skb) ; + DMA_CounterSetDisableById(&dma_tcp->recCounterGroup,counter_index) ; + dma_tcp_show_reception_one(dma_tcp,counter_index,counter_value) ; + if( skb) + { +#if defined(AUDIT_FRAME_HEADER) + if(memcmp(skb->data,((char *)(all_headers_in_counters+counter_index)),32)) + { + TRACEN(k_t_request,"(!!!) header not as first seen") ; + dumpmem(skb->data,sizeof(frame_header_t),"header-now") ; + dumpmem(all_headers_in_counters+counter_index,sizeof(frame_header_t),"header-in-propose") ; + + } +#endif + dev_kfree_skb(skb) ; + } + else + { + TRACEN(k_t_error,"(E) counter_index=0x%02x no skbuff, slot=0x%08x proposals_active=%d",counter_index,slot,proposals_active) ; + } + +} + +static void handle_empty_recCounter_reload(dma_tcp_t *dma_tcp, unsigned int counter_index, unsigned int x0, unsigned int y0, unsigned int z0) +{ + rcv_t *rcvdemux = &dma_tcp->rcvdemux ; + struct sk_buff * skb_next ; + unsigned int slot = dma_tcp->slot_for_rcv[counter_index] ; + unsigned int proposals_active=get_proposals_active(rcvdemux,slot)+1 ; + if( k_counter_flow_control ) + { + /* We're going to get a queued frame, but which queue we try first will depend on whether this source */ + /* is over quota at the moment */ + if (proposals_active > count_pending_flow(rcvdemux,slot)+1 && should_park(dma_tcp,proposals_active,x0,y0,z0)) + { + /* If we have a 'queued' frame, take that */ + skb_next = pending_rcv_skb_dequeue(dma_tcp) ; + TRACEN(k_t_general,"skb_next=%p",skb_next) ; + if( ! skb_next) + { + /* Try a 'parked' frame */ + skb_next=deq_pending_flow(rcvdemux,slot) ; + } + + } + else + { + /* If we have a 'parked' frame from the same source, get it moving now */ + skb_next=deq_pending_flow(rcvdemux,slot) ; + TRACEN(k_t_general,"skb_next=%p",skb_next) ; + if( ! skb_next) + { + /* If nothing 'parked', try the general queue */ + skb_next = pending_rcv_skb_dequeue(dma_tcp) ; + } + + } + } + else + { + skb_next = pending_rcv_skb_dequeue(dma_tcp) ; + } + if( skb_next) + { + /* A request was waiting for a receive counter, which is now available */ + propose_skb_cb * pskcb = (propose_skb_cb *)skb_next->cb ; + unsigned int src_key=pskcb->src_key ; + struct ethhdr* eth=(struct ethhdr *)(skb_next->data) ; + unsigned int x=ethhdr_src_x(eth) ; + unsigned int y=ethhdr_src_y(eth) ; + unsigned int z=ethhdr_src_z(eth) ; + unsigned int slot=pskcb->slot ; + unsigned int conn_id=pskcb->conn_id ; + unsigned int pad_head=pskcb->pad_head ; + unsigned int tot_len=pskcb->tot_len ; + if( defer_skb_for_counter(dma_tcp)) + { + /* Need a new sk_buff; need to set up alignment */ + /* TODO: shouldn't need alignment */ + /* TODO: Copy in the data from the old skbuff, so that the DMA doesn't need to resend it */ + unsigned int allocation_size = (tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ; + /* TODO: Defer allocation of the full-size sk_buff until a reception counter is available */ + struct sk_buff *skb = alloc_skb((allocation_size > 256) ? allocation_size : 256, GFP_ATOMIC) ; /* TODO: refine the size */ + TRACEN(k_t_general,"skb_next=%p skb=%p allocation_size=%d copying_length=%d src_key=0x%08x slot=0x%08x conn_id=0x%08x pad_head=0x%02x tot_len=0x%04x",skb_next,skb,allocation_size,skb_next->len,src_key,slot,conn_id,pad_head,tot_len) ; + if( skb) + { + if(k_scattergather_diagnostic) stamp_skb(skb,tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ; + skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head); + memcpy(skb->data,skb_next->data,skb_next->len) ; + skb_put(skb,tot_len+sizeof(struct ethhdr)) ; + TRACEN(k_t_general,"skb->data=%p skb->len=0x%04x skb_next->data=%p skb_next->len=0x%04x", + skb->data, skb->len, skb_next->data, skb_next->len) ; + if( k_scattergather_diagnostic) display_skb_structure(skb) ; + } + else + { + TRACEN(k_t_error,"(E) No memory available for skbuff, torus will jam") ; + /* TODO: Could handle this by deferring until memory is available, or by sending a 'negative COP' and having the sender back off */ + } + dev_kfree_skb(skb_next) ; + skb_next=skb ; + eth=(struct ethhdr *)(skb_next->data) ; // Fix up, 'accept' setup uses this + + } + if( skb_next) + { + receive_skb_using_counter(dma_tcp,skb_next,counter_index,pad_head,slot,conn_id,x,y,z,tot_len,src_key) ; + } + else + { + TRACEN(k_t_error,"(E) No memory available for skbuff, torus will jam") ; + /* TODO: Could handle this by deferring until memory is available, or by sending a 'negative COP' and having the sender back off */ + } + } + else + { + TRACEN(k_t_general|k_t_scattergather,"Reception counter 0x%02x vacant",counter_index) ; + dma_tcp->recCntrInUse[counter_index] = 0 ; + dma_tcp->rcv_skbs[counter_index] = NULL ; + dma_tcp->qtyFreeRecCounters += 1 ; + DMA_CounterSetDisableById(&dma_tcp->recCounterGroup,counter_index) ; + } + +} + +static void handle_empty_recCounter(dma_tcp_t *dma_tcp, unsigned int counter_index) +{ + struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ; + struct ethhdr *eth=(struct ethhdr *)(skb->data) ; + unsigned int x0 = ethhdr_src_x(eth) ; + unsigned int y0 = ethhdr_src_y(eth) ; + unsigned int z0 = ethhdr_src_z(eth) ; + handle_empty_recCounter_deliver(dma_tcp,counter_index) ; + handle_empty_recCounter_reload(dma_tcp,counter_index,x0,y0,z0) ; +} + +static void handle_stuck_recCounter(dma_tcp_t *dma_tcp, unsigned int counter_index) +{ + struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ; + struct ethhdr *eth=(struct ethhdr *)(skb->data) ; + unsigned int x0 = ethhdr_src_x(eth) ; + unsigned int y0 = ethhdr_src_y(eth) ; + unsigned int z0 = ethhdr_src_z(eth) ; + + instrument_flow(dma_tcp,k_receive_incomplete) ; + handle_empty_recCounter_flush(dma_tcp,counter_index) ; + handle_empty_recCounter_reload(dma_tcp,counter_index,x0,y0,z0) ; +} + +static void check_stuck_recCounters(dma_tcp_t *dma_tcp) +{ + unsigned int x ; + int j = jiffies ; + for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1) + { + if(dma_tcp->rcv_skbs[x] && (j-dma_tcp->rcv_timestamp[x]) >= 3*HZ ) + { + TRACEN(k_t_request,"(!!!) counter 0x%02x not completed after %d jiffies, freeing it",x,j-dma_tcp->rcv_timestamp[x]) ; + handle_stuck_recCounter(dma_tcp,x) ; + } + } +} + +void bgp_dma_tcp_empty_fifo_callback(void) +{ + dma_tcp_t *dma_tcp = &dma_tcp_state ; + unsigned int word0 , word1 ; + DMA_CounterGetAllHitZero(&dma_tcp->recCounterGroup, &word0, &word1) ; + if( word0 != 0 ) + { + DMA_CounterGroupClearHitZero(&dma_tcp->recCounterGroup, 0, word0) ; + TRACEN(k_t_general,"recCounterGroup word0=0x%08x",word0) ; + do { + unsigned int counter_index=32-fls(word0) ; /* Find the highest-order bit that is set */ + word0 &= (0x7fffffff >> counter_index) ; /* Clear it */ + handle_empty_recCounter(dma_tcp,counter_index) ; + } while ( word0 != 0) ; + } + if( word1 != 0) + { + DMA_CounterGroupClearHitZero(&dma_tcp->recCounterGroup, 1, word1) ; + TRACEN(k_t_general,"recCounterGroup word1=0x%08x",word1) ; + do { + unsigned int counter_index=32-fls(word1) ; /* Find the highest-order bit that is set */ + word1 &= (0x7fffffff >> counter_index) ; /* Clear it */ + handle_empty_recCounter(dma_tcp,32+counter_index) ; + } while ( word1 != 0) ; + } + /* 'clear orphaned reception counters' only works correctly if we are doing eager delivery */ + if( deliver_eagerly(dma_tcp)) + { + int checked_time = dma_tcp->rcv_checked_time ; + int j = jiffies ; + int elapsed = j - checked_time ; + if( elapsed > HZ) + { + dma_tcp->rcv_checked_time = j ; + check_stuck_recCounters(dma_tcp) ; + } + + } + + +} + +int bgp_dma_tcp_counter_copies[DMA_NUM_COUNTERS_PER_GROUP] ; + + +static inline int inject_into_dma_taxi(dma_tcp_t *dma_tcp, void * address, unsigned int length, unsigned int x, unsigned int y, unsigned int z, unsigned int my_injection_group, unsigned int desired_fifo, unsigned int proto, unsigned int SW_Arg ) + { + dma_addr_t dataAddr ; + DMA_InjDescriptor_t desc; + int ret1, ret2 ; + TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo); +/* TRACEN(k_t_scattergather,"injecting, length=0x%04x my_injection_group=%d desired_fifo=%d",length,my_injection_group,desired_fifo) ; */ + dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE); + ret1 = DMA_TorusMemFifoDescriptor( &desc, + x, y, z, + k_ReceptionFifoGroup, /* recv fifo grp id */ + 0, /* hints */ + virtual_channel(dma_tcp,k_VC_anyway), /* go whichver way it wants */ + SW_Arg, /* softw arg */ + proto, /* function id */ + k_InjectionCounterGroup, /* inj cntr group id */ + k_injCounterId, /* inj counter id */ + dataAddr, /* send address */ + length /* msg len */ + ); + + + DMA_DescriptorSetPutOffset(&desc,-length) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to indicate the message (fragment) length */ + ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames, + dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo], + &desc ); + TRACEN(k_t_scattergather , "tgt=[%d %d %d] length=0x%04x injfifo[%d %02x]\n", + x,y,z,length, + my_injection_group,desired_fifo ) ; + TRACEN(k_t_general , "(<) ret1=%d ret2=%d",ret1, ret2); + return 1 ; + } + + + +/* The injectors are currently set up so that each 'software FIFO' pushes to a single (different) 'hardware FIFO' */ +/* This isn't needed for 'adaptive'; things could be rearranged for all 'software FIFOs' to have access to all 'hardware FIFOs' */ +enum { + k_my_vc_for_adaptive = k_VC_anyway +/* Diagnostically flip it to 'deterministic' ... */ +/* k_my_vc_for_adaptive = k_VC_ordering */ +}; +static inline int inject_into_dma_adaptive(dma_tcp_t *dma_tcp, + void * address, + unsigned int length, + unsigned int x, unsigned int y, unsigned int z, + unsigned int my_injection_group, + unsigned int desired_fifo, + unsigned int proto, + unsigned int SW_Arg, + unsigned int conn_id ) + { + dma_addr_t dataAddr ; + DMA_InjDescriptor_t desc; + int ret1, ret2 __attribute((unused)); + unsigned int firstpacketlength = ( length > k_injection_packet_size) ? k_injection_packet_size : length ; + unsigned int midpacketcount = (length-(k_injection_packet_size+1)) / k_injection_packet_size ; + unsigned int packetcount = (length > k_injection_packet_size) ? (midpacketcount+2) : 1 ; + int PutOffset = (conn_id << 25) | (packetcount << 16) | ((-length) & 0xfff0) ; + TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo); + dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE); + if( length >= 10000) + { + TRACEN(k_t_request,"address=%p length=0x%08x dataAddr=0x%08llx",address,length,dataAddr) ; + } + +/* First injection is 'start of frame/fragment' */ + ret1 = DMA_TorusMemFifoDescriptor( &desc, + x, y, z, + k_ReceptionFifoGroup, /* recv fifo grp id */ + 0, /* hints */ + virtual_channel(dma_tcp,k_my_vc_for_adaptive), /* vc - adaptive */ + SW_Arg, /* softw arg */ + proto, /* function id */ + k_InjectionCounterGroup, /* inj cntr group id */ + k_injCounterId, /* inj counter id */ + dataAddr, /* send address */ + packetcount*firstpacketlength /* msg len */ + ); + + + DMA_DescriptorSetPutOffset(&desc,PutOffset) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to pass required data to receive actor */ + ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames, + dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo], + &desc ); + TRACEN(k_t_scattergather ,"tgt=[%d %d %d] length=0x%04x injfifo[%d %02x] conn_id=0x%02x\n", + x,y,z,length, + my_injection_group,desired_fifo,conn_id ) ; + TRACEN(k_t_general , "proto=%d firstpacketlength=%d ret1=%d ret2=%d",proto,firstpacketlength,ret1, ret2); + + return 1 ; + + } + +static inline void create_dma_descriptor_adaptive(dma_tcp_t *dma_tcp, + void * address, + unsigned int length, + unsigned int x, unsigned int y, unsigned int z, + unsigned int proto, + unsigned int SW_Arg, + unsigned int conn_id, + DMA_InjDescriptor_t *desc) + { + dma_addr_t dataAddr ; + int ret1 __attribute__((unused)); + unsigned int firstpacketlength = ( length > k_injection_packet_size) ? k_injection_packet_size : length ; + unsigned int midpacketcount = (length-(k_injection_packet_size+1)) / k_injection_packet_size ; + unsigned int packetcount = (length > k_injection_packet_size) ? (midpacketcount+2) : 1 ; + int PutOffset = (conn_id << 25) | (packetcount << 16) | ((-length) & 0xfff0) ; + TRACEN(k_t_general , "(>) address=%p length=0x%08x x=%d y=%d z=%d proto=%d SW_Arg=0x%08x desc=%p",address,length,x,y,z,proto,SW_Arg,desc); + dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE); + if( length >= 10000) + { + TRACEN(k_t_request,"address=%p length=0x%08x dataAddr=0x%08llx",address,length,dataAddr) ; + } + +/* First injection is 'start of frame/fragment' */ + ret1 = DMA_TorusMemFifoDescriptor( desc, + x, y, z, + k_ReceptionFifoGroup, /* recv fifo grp id */ + 0, /* hints */ + virtual_channel(dma_tcp,k_my_vc_for_adaptive), /* vc - adaptive */ + SW_Arg, /* softw arg */ + proto, /* function id */ + k_InjectionCounterGroup, /* inj cntr group id */ + k_injCounterId, /* inj counter id */ + dataAddr, /* send address */ + packetcount*firstpacketlength /* msg len */ + ); + + DMA_DescriptorSetPutOffset(desc,PutOffset) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to pass required data to receive actor */ + TRACEN(k_t_general , "(<) firstpacketlength=%d ret1=%d",firstpacketlength,ret1); + + } + +static inline int inject_dma_descriptor_adaptive(dma_tcp_t *dma_tcp, + unsigned int my_injection_group, + unsigned int desired_fifo, + DMA_InjDescriptor_t *desc) + { + int ret __attribute__((unused)); + TRACEN(k_t_general|k_t_sgdiag , "(>) injecting my_injection_group=%d desired_fifo=%d desc=%p",my_injection_group,desired_fifo,desc); + TRACEN(k_t_sgdiag,"injecting 0x%04x bytes",desc->msg_length) ; + ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames, + dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo], + desc ); + + TRACEN(k_t_general , "(<) ret=%d",ret); + return 1 ; + + } + +static inline int inject_dma_descriptors_adaptive(dma_tcp_t *dma_tcp, + unsigned int my_injection_group, + unsigned int desired_fifo, + DMA_InjDescriptor_t **desc, + unsigned int count ) + { + int ret __attribute__((unused)); + int r2 __attribute__((unused)); + unsigned int fifo_index = my_injection_group*k_injecting_directions+desired_fifo ; + TRACEN(k_t_general|k_t_sgdiag , "(>) injecting my_injection_group=%d desired_fifo=%d desc=%p count=%d fifo_id=0x%02x", + my_injection_group,desired_fifo,desc,count, dma_tcp->injFifoFramesIds[fifo_index]); + if( 0 == desc[0]->msg_length) + { + TRACEN(k_t_general,"(I) msg_length[0] zero, injection skipped") ; + desc += 1 ; + count -= 1 ; + } + ret = DMA_InjFifoInjectDescriptorsById( &dma_tcp->injFifoGroupFrames, + dma_tcp->injFifoFramesIds[fifo_index], + count, + desc ); + r2=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId, 0xffffffff ); + if( ret != count) + { + TRACEN(k_t_error,"(!!!) count=%d ret=%d",count,ret) ; + } + + TRACEN(k_t_general , "(<) count=%d fifo_id=0x%02x", + count,dma_tcp->injFifoFramesIds[fifo_index]); + + return count ; + } + +/* Don't actually need this; the length is precise anyway, we just may waste some cells in the last packet */ +#if 0 +static inline int inject_dma_descriptor_adaptive_precise_length(dma_tcp_t *dma_tcp, + unsigned int my_injection_group, + unsigned int desired_fifo, + DMA_InjDescriptor_t *desc) + { + unsigned int size=desc->msg_length ; + unsigned int full_frame_count=size / k_torus_link_payload_size ; + unsigned int full_frame_size = full_frame_count * k_torus_link_payload_size ; + unsigned int trailing_frame_size = size - full_frame_size ; + unsigned int rc=0 ; + if(0 == trailing_frame_size || 0 == full_frame_count) // These cases were already 'precise' + { + int ret __attribute__((unused)); + TRACEN(k_t_general , "(>) injecting my_injection_group=%d desired_fifo=%d desc=%p",my_injection_group,desired_fifo,desc); + ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames, + dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo], + desc ); + TRACEN(k_t_general , "(<) ret=%d",ret); + return 1 ; + } + else + { + /* Need to split into 2 injections in order not to transmit extra cells */ + int ret __attribute__((unused)); + desc->msg_length=full_frame_size ; + ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames, + dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo], + desc ); + desc->msg_length=trailing_frame_size ; + desc->base_offset += full_frame_size ; + desc->hwHdr.Chunks = DMA_PacketChunks(trailing_frame_size) - 1 ; + ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames, + dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo], + desc ); + return 2 ; + + + + } + + } +#endif + + +static void analyse_skb(struct sk_buff *skb) __attribute__ ((unused)) ; +static void analyse_skb(struct sk_buff *skb) + { + struct sock *sk=skb->sk ; + struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + unsigned int daddr=inet->daddr ; + unsigned int flags = TCP_SKB_CB(skb)->flags ; + if(icsk->icsk_retransmits > 0 ) + { + TRACEN(k_t_congestion,"(I) sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d resending (BGP)", + sk, skb, skb->data, skb->len, flags, + daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff, + icsk->icsk_retransmits, icsk->icsk_rto ) ; + } + } + +static inline int selfsend(const torusLocation_t * t, unsigned int x, unsigned int y, unsigned int z) +{ + unsigned int tx=t->coordinate[0] ; + unsigned int ty=t->coordinate[1] ; + unsigned int tz=t->coordinate[2] ; + return (tx == x && ty == y && tz == z) ; +} + +static inline int offfabric(const torusLocation_t * t, unsigned int x, unsigned int y, unsigned int z) +{ + unsigned int tx=t->coordinate[0] ; + unsigned int ty=t->coordinate[1] ; + unsigned int tz=t->coordinate[2] ; + return (x >= tx || y >= ty || z >= tz) ; +} +static inline void clear_dir_in_use(unsigned char * direction_is_in_use) +{ + int x ; + for(x=0;x<=k_injecting_directions;x+=1) + { + direction_is_in_use[x] = 0 ; + } +} + +static inline void record_dir_in_use(dma_tcp_t * dma_tcp,unsigned char * direction_is_in_use) +{ + int x ; + for(x=0;x<k_injecting_directions;x+=1) + { + dma_tcp->tx_in_use_count[x] += direction_is_in_use[x] ; + } + dma_tcp->tx_in_use_count[k_injecting_directions] += 1 ; +} + +/* Routine to free all the skbuffs that control data which has left the node */ +static void dma_tcp_frames_transmission_free_skb(unsigned long parm) + { + dma_tcp_t *dma_tcp = &dma_tcp_state ; + unsigned int core ; + unsigned int total_injection_used = 0 ; + unsigned char direction_is_in_use[k_skb_controlling_directions] ; + clear_dir_in_use(direction_is_in_use) ; +#if defined(TRACK_LIFETIME_IN_FIFO) + unsigned long long now=get_powerpc_tb() ; +#endif + for( core=0 ; core<k_injecting_cores; core += 1) + { + unsigned int desired_fifo ; + for(desired_fifo=0; desired_fifo<k_skb_controlling_directions; desired_fifo += 1 ) + { + spinlock_t * injectionLock = &dma_tcp->dirInjectionLock[core*k_injecting_directions+desired_fifo] ; + idma_direction_t * buffer = dma_tcp->idma.idma_core[core].idma_direction+desired_fifo ; + unsigned int fifo_initial_head = dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].fifo_initial_head ; + unsigned int bhx = buffer->buffer_head_index ; + unsigned int btx = buffer->buffer_tail_index ; /* This indexes the oldest skbuff that might still be pending send by the DMA unit */ + unsigned int fifo_current_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ; + unsigned int fifo_current_tail = + (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ; + unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ; + unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ; + unsigned int current_injection_used=packet_mod(tailx-headx) ; + int skql2 = packet_mod(bhx-btx) ; + if( 0 != current_injection_used ) direction_is_in_use[desired_fifo] = 1 ; + if( skql2 != current_injection_used) + { + skb_group_t skb_group ; + + skb_group_init(&skb_group) ; + if( spin_trylock(injectionLock)) + { + unsigned int bhx = buffer->buffer_head_index ; + unsigned int btx = buffer->buffer_tail_index ; /* This indexes the oldest skbuff that might still be pending send by the DMA unit */ + unsigned int fifo_current_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ; + unsigned int fifo_current_tail = + (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ; + unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ; + unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ; + unsigned int current_injection_used=packet_mod(tailx-headx) ; + int skql2 = packet_mod(bhx-btx) ; + int count_needing_freeing = skql2-current_injection_used ; + int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ; + TRACEN(k_t_detail,"current_injection_used=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,skql2,count_needing_freeing,count_to_free); + skb_group_queue(&skb_group,dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free +#if defined(TRACK_LIFETIME_IN_FIFO) + , core, desired_fifo, now +#endif + ) ; + btx = packet_mod(btx+count_to_free) ; + buffer->buffer_tail_index = btx ; + TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index); + total_injection_used += current_injection_used ; + + spin_unlock(injectionLock) ; + skb_group_free(&skb_group) ; + } + else + { + total_injection_used += current_injection_used ; + } + } + } + } + TRACEN(k_t_detail,"total_injection_used=%d",total_injection_used) ; + record_dir_in_use(dma_tcp,direction_is_in_use) ; + if( total_injection_used > 0 ) + { + mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ; + } + } + + +static void display_skb_structure(struct sk_buff *skb) +{ + int f ; + unsigned int headlen=skb_headlen(skb) ; + TRACEN(k_t_request, "sk_buff(head=%p data=%p tail=%p end=%p len=0x%08x data_len=0x%08x nr_frags=%d", + skb->head, skb->data, skb->tail, skb->end, skb->len, skb->data_len, skb_shinfo(skb)->nr_frags) ; + dumpmem(skb->data,(headlen > 256) ? 256 : headlen,"skb head") ; + for(f=0; f<skb_shinfo(skb)->nr_frags; f+=1) + { + struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f]; + unsigned int page_offset=frag->page_offset ; + unsigned int size = frag->size ; + TRACEN(k_t_request, " frags[%d](page_offset=0x%08x size=0x%08x)", + f,page_offset,size) ; + } +} + +static inline unsigned int imin2(unsigned int a, unsigned int b) +{ + return (a>b) ? b : a ; +} +#if defined(USE_SKB_TO_SKB) +static void bgp_dma_tcp_s_and_f_frames_dma( + dma_tcp_t *dma_tcp, + struct sk_buff *skb + ) +{ + frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ; + struct ethhdr *eth = (struct ethhdr *)(skb->data) ; + unsigned int x = eth->h_dest[3] ; + unsigned int y = eth->h_dest[4] ; + unsigned int z = eth->h_dest[5] ; + unsigned int payload_address = (unsigned int)(skb->data) ; + unsigned int aligned_payload_address = payload_address & (~ 0x0f) ; + unsigned int pad_head = payload_address & 0x0f ; + unsigned int src_key = (dma_tcp->src_key << 4) | pad_head ; /* Everything to a given node will go on the same stream, no point coding injection group in */ + unsigned int headlen = skb_headlen(skb) ; + TRACEN(k_t_general ,"(>)skb=%p (%02x,%02x,%02x) data=%p length=%d data_len=%d headlen=%d", skb,x,y,z,skb->data, skb->len, skb->data_len,headlen); + dumpframe(skb->data, skb_headlen(skb), "skbuff to send") ; + + TRACEN(k_t_general, "(=)(I) testdma: Sending to (%d,%d,%d)", + x, y, z ); + + /* Make sure we're not trying to send off the partition or to self */ + if( k_verify_target) + { + if( offfabric(&(dma_tcp->extent),x,y,z)) + { + TRACEN(k_t_error, "(W) Target (%d,%d,%d) not in range",x,y,z) ; + WARN_ON(1) ; + dev_kfree_skb(skb) ; + return ; + } + if( selfsend(&(dma_tcp->location),x,y,z)) + { + TRACEN(k_t_error, "(W) Self-send not supported by hardware (%d %d %d)",x,y,z) ; + WARN_ON(1) ; + dev_kfree_skb(skb) ; + return ; + } + } + + TRACEN(k_t_protocol,"(=)sending packet to (%02x,%02x,%02x) length=%d", + x,y,z,skb->len) ; + + /* copy descriptor into the inj fifo */ + { + unsigned int dest_key = x*dma_tcp->extent.coordinate[1]*dma_tcp->extent.coordinate[2] + +y*dma_tcp->extent.coordinate[2] + +z ; + unsigned int conn_id = take_tx_conn_id(&dma_tcp->tx_mux,dest_key) ; + atomic_inc(&dma_tcp->framesProposed) ; + TRACEN(k_t_general,"Saving skb=%p for dest_key=0x%08x conn_id=0x%08x",skb,dest_key,conn_id) ; + set_tx_skb(&dma_tcp->tx_mux,dest_key,conn_id,skb) ; + ficb->free_when_done = 0 ; + +#if defined(AUDIT_HEADLEN) + { + struct iphdr *iph = (struct iphdr *)(eth+1) ; + ficb->tot_len = iph->tot_len ; + } +#endif + { + /* If we have a 'scatter-gather' skb, try to put the head into the 'propose' packet */ + unsigned int nr_frags = skb_shinfo(skb)->nr_frags ; + unsigned int propose_length = (nr_frags == 0 ) ? 48 : imin2(pad_head+headlen,k_torus_link_payload_size) ; + eth->h_source[0] = propose_length ; // Use a byte on-the-side to say how much data was actually sent + TRACEN(k_t_general,"nr_frags=%d propose_length=%d",nr_frags,propose_length) ; + create_dma_descriptor_propose_accept(dma_tcp, + (void *)aligned_payload_address, + propose_length, + x,y, z, + dma_tcp->proto_transfer_propose, + src_key, + conn_id, + 0, + &ficb->desc, + propose_length + ) ; + } + } + instrument_flow(dma_tcp,k_send_propose_rpc) ; + bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp, skb, 0, k_cattle_class) ; +} +#endif + +static int inject_scattergather( + dma_tcp_t *dma_tcp, + struct sk_buff *skb, + unsigned int my_injection_group, + unsigned int desired_fifo +) +{ + frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + struct ethhdr *eth = (struct ethhdr *)(skb->data) ; + unsigned int aligned_payload_length = ficb->desc.msg_length ; + unsigned int x=ficb->desc.hwHdr.X ; + unsigned int y=ficb->desc.hwHdr.Y ; + unsigned int z=ficb->desc.hwHdr.Z ; + unsigned int f ; + unsigned int dest_offset=k_abbreviate_headlen ? (aligned_payload_length+eth->h_source[0]): aligned_payload_length ; + unsigned int base_offset=ficb->desc.base_offset ; + unsigned int rctr=ficb->desc.hwHdr.rDMA_Counter % DMA_NUM_COUNTERS_PER_GROUP ; + struct iphdr *iph = (struct iphdr *)(eth+1) ; + unsigned int daddr=iph->daddr ; + + DMA_InjDescriptor_t descVector[MAX_SKB_FRAGS] ; + DMA_InjDescriptor_t * descPtr[1+MAX_SKB_FRAGS] ; + unsigned int total_inj_length = ficb->desc.msg_length ; + TRACEN(k_t_scattergather|k_t_sgdiag,"injecting, base_offset=0x%04x length=0x%04x my_injection_group=%d desired_fifo=%d dest_offset=0x%04x", + base_offset,ficb->desc.msg_length,my_injection_group,desired_fifo, dest_offset) ; + + /* Prepare the initial not-fragment part */ + descPtr[0] = &ficb->desc ; + /* scatter-gather fragments to be pushed out here */ + for(f=0;f<nr_frags;f+=1) + { + struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f]; + struct page *page = frag->page ; + unsigned int page_offset=frag->page_offset ; + unsigned int size = frag->size ; + dma_addr_t buffAddr = dma_map_page(NULL, page, page_offset, size, DMA_TO_DEVICE); + TRACEN(k_t_scattergather|k_t_sgdiag,"f=%d page=%p page_offset=0x%04x size=0x%04x buffAddr=0x%08llx dest_offset=0x%04x", + f,page,page_offset,size,buffAddr,dest_offset) ; + total_inj_length += size ; + if( 0 != size) + { + create_dma_descriptor_direct_put_offset(dma_tcp,x,y,z,k_injCounterId,rctr,buffAddr,size,descVector+f,dest_offset) ; + } + else + { + TRACEN(k_t_request,"(I) frag length zero") ; + DMA_ZeroOutDescriptor(descVector+f) ; + instrument_flow(dma_tcp,k_fraglength_zero) ; + } + descPtr[1+f]=descVector+f ; + dest_offset += size ; + + } + TRACEN(k_t_sgdiag,"Injecting tgt=[%d,%d,%d] length=0x%04x ctr=0x%02x",x,y,z,total_inj_length,rctr) ; + + + TRACEN(k_t_scattergather ,"tgt=[%d %d %d] daddr=%d.%d.%d.%d tot_len=0x%04x, length=0x%04x headlen=0x%04x data_len=0x%04x dest_offset=0x%04x nr_frags=%d fragsizes[0x%04x 0x%04x 0x%04x] counter=0x%02x injfifo[%d %02x]\n", + x,y,z, + daddr>>24, (daddr >> 16) & 0xff,(daddr >> 8) & 0xff, daddr & 0xff,iph->tot_len, + skb->len,skb_headlen(skb), skb->data_len, dest_offset, + nr_frags,skb_shinfo(skb)->frags[0].size,skb_shinfo(skb)->frags[1].size,skb_shinfo(skb)->frags[2].size,rctr,my_injection_group,desired_fifo ) ; + if( skb_headlen(skb) < sizeof(struct ethhdr)+sizeof(struct iphdr)) + { + TRACEN(k_t_request,"(!!!) length=0x%04x data_len=0x%04x nr_frags=%d fragsizes[0x%04x 0x%04x 0x%04x]",skb->len, skb->data_len, nr_frags,skb_shinfo(skb)->frags[0].size,skb_shinfo(skb)->frags[1].size,skb_shinfo(skb)->frags[2].size) ; + display_skb_structure(skb) ; + } + return inject_dma_descriptors_adaptive(dma_tcp,my_injection_group,desired_fifo,descPtr,1+nr_frags) ; + +} +/* Send-and-free a frame with an already-prepared injection descriptor (which might be DMA-put or FIFO-put) */ +static int bgp_dma_tcp_s_and_f_frames_prepared( + dma_tcp_t *dma_tcp, + struct sk_buff *skb, + unsigned int queue_at_head, + unsigned int transport_class + ) + { + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + unsigned int is_scattergather = (nr_frags > 0 ) ; + unsigned int payload_length = (skb -> len) - (skb->data_len) ; + unsigned int payload_address = (unsigned int)(skb->data) ; + unsigned int aligned_payload_address = payload_address & (~ 0x0f) ; + unsigned int pad_head = payload_address & 0x0f ; + unsigned int aligned_payload_length = payload_length + pad_head ; + #if 1 + unsigned int use_taxi = 0 ; + #else + unsigned int use_taxi = (aligned_payload_length<=k_injection_packet_size) && (0 == nr_frags); + #endif + unsigned long flags ; + unsigned int current_injection_used=0xffffffff ; + + int ret = 0; + int ring_ok ; + + int my_injection_group ; + skb_group_t skb_group ; + frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ; + unsigned int x=ficb->desc.hwHdr.X ; + unsigned int y=ficb->desc.hwHdr.Y ; + unsigned int z=ficb->desc.hwHdr.Z ; + unsigned int header_dma_length=ficb->desc.msg_length ; // If this is zero, then we can free the skb as soon as its 'frags' are in software injection fifo + TRACEN(k_t_general ,"(>)skb=%p (%02x,%02x,%02x) data=%p length=%d data_len=%d nr_frags=%d", skb,x,y,z,skb->data, skb->len, skb->data_len, nr_frags); + if(is_scattergather ) instrument_flow(dma_tcp,k_scattergather) ; + + skb_group_init(&skb_group) ; + + TRACEN(k_t_general, "(=)(I) testdma: Sending to (%d,%d,%d)", + x, y, z ); + +/* Make sure we're not trying to send off the partition or to self */ + if( k_verify_target) + { + if( offfabric(&(dma_tcp->extent),x,y,z)) + { + TRACEN(k_t_error, "(W) Target (%d,%d,%d) not in range",x,y,z) ; + WARN_ON(1) ; + dev_kfree_skb(skb) ; + return -EINVAL; + } + if( selfsend(&(dma_tcp->location),x,y,z)) + { + TRACEN(k_t_error, "(W) Self-send not supported by hardware (%d %d %d)",x,y,z) ; + WARN_ON(1) ; + dev_kfree_skb(skb) ; + return -EINVAL; + } + } + TRACEN(k_t_protocol,"(=)sending packet to (%02x,%02x,%02x) length=%d", + x,y,z,skb->len) ; + + /* copy descriptor into the inj fifo */ + { + unsigned int desired_fifo=((transport_class != k_cattle_class) && (aligned_payload_length<=k_injection_packet_size) && (0 == nr_frags)) ? (k_skb_controlling_directions-1) : select_transmission_fifo(dma_tcp,x,y,z) ; + my_injection_group=injection_group_hash(dma_tcp,x,y,z) ; + spin_lock_irqsave(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ; + { + unsigned int src_key = (dma_tcp->src_key << 4) | pad_head ; /* Everything to a given node will go on the same stream, no point coding injection group in */ + /* Work out which buffer we are going to use for the packet stream */ + idma_direction_t * buffer = dma_tcp->idma.idma_core[my_injection_group].idma_direction+desired_fifo ; + /* Set up the payload */ + unsigned int bhx = buffer->buffer_head_index ; + unsigned int lastx = packet_mod(bhx) ; + unsigned int fifo_initial_head = dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].fifo_initial_head ; + unsigned int fifo_current_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ; + unsigned int fifo_current_tail = + (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ; + unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ; + unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ; + unsigned int injection_count ; +#if defined(TRACK_LIFETIME_IN_FIFO) + unsigned long long now=get_powerpc_tb() ; + *(unsigned long long*)(skb->cb) = now ; +#endif + current_injection_used=packet_mod(tailx-headx) ; + /* If the network is backing up, we may have to skip out here, */ + /* so that we don't overwrite unsent data. */ + TRACEN(k_t_general ,"Runway desired_fifo=%d headx=%d tailx=%d bhx=%d current_injection_used=%d", + desired_fifo,headx,tailx,bhx,current_injection_used) ; + if( current_injection_used > buffer->injection_high_watermark ) + { + buffer->injection_high_watermark=current_injection_used ; /* Congestion statistic */ + } + { + /* Need to have room to inject the in-skbuff data plus all attached 'fragments', each of which may be sent in 3 injections */ + if( current_injection_used+3*(MAX_SKB_FRAGS+1) < k_injection_packet_count-1) + { + ring_ok = 1 ; + TRACEN(k_t_general,"Runway slot granted") ; + } + else + { + ring_ok = 0 ; + TRACEN(k_t_congestion,"Runway slot denied tailx=%08x headx=%08x",tailx,headx) ; + } + } + TRACEN(k_t_general ,"Injection my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x nr_frags=%d", + my_injection_group, desired_fifo, bhx, headx,tailx,nr_frags + ) ; + if ( ring_ok ) + { + /* We are going to send something. */ + + /* Bump the injection counter. Actually only needs doing once per 4GB or so */ + ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId, 0xffffffff ); + + /* and inject it */ + if(use_taxi) + { + injection_count = inject_into_dma_taxi(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z,my_injection_group,desired_fifo, + dma_tcp->proto_issue_frames_single,src_key) ; + } + else + { + if( is_scattergather && 0 != ficb->free_when_done) + { + injection_count = inject_scattergather( + dma_tcp,skb,my_injection_group,desired_fifo + ) ; + } + else + { + /* Prop, or accept, or unfragmented skbuff */ + injection_count = inject_dma_descriptor_adaptive(dma_tcp,my_injection_group,desired_fifo, + &ficb->desc + ) ; + } + + } + { + unsigned int nhx=packet_mod(bhx+injection_count) ; + /* Remember where we will be pushing the next injection in */ + TRACEN(k_t_detail,"Next injection will be at nhx=0x%08x",nhx) ; + buffer->buffer_head_index = nhx ; + /* Record the skbuff so it can be freed later, after data is DMA'd out */ + if( ficb->free_when_done && header_dma_length > 0 ) + { + TRACEN(k_t_detail,"Saving skb=%p at [%p] for freeing later",skb,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array+nhx) ; + dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array[nhx] = skb ; + } + } + /* hang on to the skbs until they are sent ... */ + if( current_injection_used != 0xffffffff) + { + unsigned int btx = buffer->buffer_tail_index ; /* This indexes the oldest skbuff that might still be pending send by the DMA unit */ + int skql2 = packet_mod(bhx-btx) ; + int count_needing_freeing = skql2-current_injection_used ; + int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ; + TRACEN(k_t_detail ,"current_injection_used=%d btx=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,btx,skql2,count_needing_freeing,count_to_free); + skb_group_queue(&skb_group,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free +#if defined(TRACK_LIFETIME_IN_FIFO) + , my_injection_group, desired_fifo, now +#endif + ) ; + btx = packet_mod(btx+count_to_free) ; + buffer->buffer_tail_index = btx ; + TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index); + } + } + else + { + TRACEN(k_t_congestion,"Would overrun my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x lastx=%08x", + my_injection_group, desired_fifo, bhx, headx,tailx, lastx + ) ; + } + } + spin_unlock_irqrestore(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ; + skb_group_free(&skb_group) ; + if( k_async_free ) mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ; + if( 0 == ring_ok ) + { + TRACEN(k_t_congestion,"(=)Queuing skb=%p desired_fifo=%d (%u %u %u)", skb,desired_fifo,x,y,z) ; + if( queue_at_head) + { + skb_queue_head(dma_tcp->inj_queue+desired_fifo, skb) ; + } + else + { + skb_queue_tail(dma_tcp->inj_queue+desired_fifo, skb) ; + } + } + else + { + if( 0 == header_dma_length) + { + TRACEN(k_t_general,"Freeing skb=%p, its header has left the node",skb) ; + dev_kfree_skb(skb) ; + } + } + + + + TRACEN(k_t_general ,"(<) ring_ok=%d desired_fifo=%d",ring_ok,desired_fifo); + + return ring_ok ? desired_fifo : -1 ; + } + + } + +/* ... return 'direction' if we sent the packet, '-1' if we queued it */ +static int bgp_dma_tcp_s_and_f_frames( + dma_tcp_t *dma_tcp, + struct sk_buff *skb, + unsigned int queue_at_head + ) +{ +#if defined(USE_ADAPTIVE_ROUTING) + struct ethhdr *eth = (struct ethhdr *)(skb->data) ; + unsigned int x = eth->h_dest[3] ; + unsigned int y = eth->h_dest[4] ; + unsigned int z = eth->h_dest[5] ; + unsigned int payload_length = (skb -> len) - (skb->data_len) ; + unsigned int payload_address = (unsigned int)(skb->data) ; + unsigned int aligned_payload_address = payload_address & (~ 0x0f) ; + unsigned int pad_head = payload_address & 0x0f ; + unsigned int src_key = (dma_tcp->src_key << 4) | pad_head ; /* Everything to a given node will go on the same stream, no point coding injection group in */ + unsigned int aligned_payload_length = payload_length + pad_head ; + frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ; + + unsigned int dest_key = x*dma_tcp->extent.coordinate[1]*dma_tcp->extent.coordinate[2] + +y*dma_tcp->extent.coordinate[2] + +z ; + unsigned int conn_id = take_tx_conn_id(&dma_tcp->tx_mux,dest_key) ; + instrument_flow(dma_tcp,k_send_eager) ; + ficb->free_when_done = 1 ; + + if(TRACING(k_t_sgdiag)) + { + diag_skb_structure(skb) ; + } + create_dma_descriptor_adaptive(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z, + dma_tcp->proto_issue_frames_adaptive,src_key,conn_id, &ficb->desc + ) ; + +#endif + if( k_verify_ctlen) + { + unsigned int ctlen = counted_length(skb) ; + struct ethhdr *eth = (struct ethhdr *)(skb->data) ; + struct iphdr *iph = (struct iphdr *)(eth+1) ; + if( ctlen != iph->tot_len + sizeof(struct ethhdr)) + { + TRACEN(k_t_error,"(E) Counted length mismatch, skb=%p, conuted_length=0x%04x, tot_len=0x%04x",skb,ctlen,iph->tot_len ) ; + display_skb_structure(skb) ; + display_iphdr(iph) ; + dev_kfree_skb(skb) ; // It would cause trouble later, to try and send it. So drop it. + instrument_flow(dma_tcp,k_counted_length_mismatch) ; + return 0 ; // Not really 'direction 0', but this will not cause the caller a problem. + } + } + + return bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp,skb,queue_at_head, 0) ; +} + +/* Try to clear a pending skbuff queue into the mem-fifo */ +/* return 0 if queue cleared */ +/* -1 if the queue cannot be cleared because the FIFO gets full */ +static int bgp_dma_tcp_try_to_clear_queue(dma_tcp_t *dma_tcp, unsigned int direction) noinline ; +static int bgp_dma_tcp_try_to_clear_queue(dma_tcp_t *dma_tcp, unsigned int direction) + { + struct sk_buff_head *skq = dma_tcp->inj_queue+direction ; + TRACEN(k_t_general,"(>) direction=%u",direction ); + if( ! skb_queue_empty(skq)) + { + /* We sent something, and there is a pending list which we might be able to send as well */ + for(;;) + { + struct sk_buff * askb = skb_dequeue(skq) ; + if( askb) + { + TRACEN(k_t_congestion,"(=)Dequeuing dir=%d askb=%p length=%u", direction, askb,askb->len) ; + { + int arc= bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp,askb,1,k_cattle_class) ; + if( -1 == arc) + { + TRACEN(k_t_congestion,"still-congested dir=%d",direction ); + TRACEN(k_t_general,"(<) still-congested" ); + instrument_flow(dma_tcp,k_queue_filled_propose_fifo) ; + return -1 ; /* Queue not cleared */ + } + } + } + else + { + TRACEN(k_t_congestion,"(=)Dequeuing askb=NULL") ; + break ; + } + + } + + } + + TRACEN(k_t_general,"(<) clear" ); + return 0 ; /* Queue cleared */ + } + +static void dma_tcp_frames_runway_check(unsigned long parm) + { + dma_tcp_t *dma_tcp = &dma_tcp_state ; + int direction ; + int anything_queued = 0 ; + TRACEN(k_t_congestion,"(>)"); + for(direction=0;direction<k_injecting_directions;direction+=1) + { + anything_queued += bgp_dma_tcp_try_to_clear_queue(dma_tcp,direction) ; + } + if( anything_queued) + { + mod_timer(&dma_tcp->runway_check_timer,jiffies+1) ; /* Redrive on the next timer tick */ + } + TRACEN(k_t_congestion,"(<) anything_queued=%d",anything_queued); + } + +/* Take an skbuff bound for (x,y,z), and either put it in the software FIFO or queue it for when congestion abates */ +int bgp_dma_tcp_send_and_free_frames( struct sk_buff *skb ) +{ + TRACEN(k_t_general,"(>)skb=%p data=%p length=%d", skb,skb->data, skb->len) ; + { + dma_tcp_t *dma_tcp = &dma_tcp_state ; + dma_tcp->tx_by_core[smp_processor_id() & 3] += 1 ; /* Stats on which core(s) are busy */ +#if defined(CONFIG_BGP_STATISTICS) + { + struct ethhdr *eth = (struct ethhdr *) (skb->data) ; + struct iphdr *iph=(struct iphdr *) (eth+1) ; + dma_tcp->bytes_sent += iph->tot_len ; + } +#endif + + if( 0 == skb_headlen(skb)) + { + TRACEN(k_t_request,"(I) head length zero") ; + } + +#if defined(USE_SKB_TO_SKB) + if( skb->len > dma_tcp->eager_limit || 0 != skb_shinfo(skb)->nr_frags ) + { + bgp_dma_tcp_s_and_f_frames_dma(dma_tcp,skb) ; + } + else +#endif + { + int rc = bgp_dma_tcp_s_and_f_frames(dma_tcp,skb, + /* x,y,z, */ + 0) ; + if( rc == -1) + { + mod_timer(&dma_tcp->runway_check_timer,jiffies+1) ; /* Redrive on the next timer tick */ + } + } + } + TRACEN(k_t_general,"(<)"); + return 0 ; +} + +#if defined(ENABLE_LATENCY_TRACKING) + +static unsigned int isqrt(unsigned int x) + { + unsigned int rc=0 ; + unsigned int i ; + for( i=0;i<16;i+=1) + { + unsigned int c= rc | (0x8000 >> i) ; + if( c*c <= x ) rc = c ; + } + return rc ; + } +#endif + +#if defined(TRACK_SEQUENCE) +static void dma_tcp_frames_show_sequence(dma_tcp_t *dma_tcp) + { + unsigned int x ; + unsigned int y ; + unsigned int z ; + unsigned int core ; + unsigned int xsize = dma_tcp->extent.coordinate[0] ; + unsigned int ysize = dma_tcp->extent.coordinate[1] ; + unsigned int zsize = dma_tcp->extent.coordinate[2] ; + unsigned int myx = dma_tcp->location.coordinate[0] ; + unsigned int myy = dma_tcp->location.coordinate[1] ; + unsigned int myz = dma_tcp->location.coordinate[2] ; + for(x=0;x<xsize; x+=1 ) + { + for( y = 0; y<ysize; y+=1) + { + for( z = 0 ; z<zsize; z+=1 ) + { + unsigned int slot_base = x*(ysize*zsize) + y*zsize + z ; + for( core=0; core<k_injecting_cores; core+=1) + { + unsigned int slot = (slot_base << 2) | core ; + unsigned int txcount = send_sequences[slot] ; + unsigned int rxcount = receive_sequences[slot] ; + if( txcount || rxcount) + { + TRACEN(k_t_request,"( %d %d %d ) show_sequence( %d %d %d %d )=( %d %d )", myx, myy, myz, x,y,z,core, txcount,rxcount) ; + } + } + } + } + } + } +#endif + +#if defined(ENABLE_PROGRESS_TRACKING) +static void dma_tcp_frames_show_progress(dma_tcp_t *dma_tcp) + { + unsigned int x ; + unsigned int y ; + unsigned int z ; + unsigned int core ; + unsigned int xsize = dma_tcp->extent.coordinate[0] ; + unsigned int ysize = dma_tcp->extent.coordinate[1] ; + unsigned int zsize = dma_tcp->extent.coordinate[2] ; + unsigned int myx = dma_tcp->location.coordinate[0] ; + unsigned int myy = dma_tcp->location.coordinate[1] ; + unsigned int myz = dma_tcp->location.coordinate[2] ; + unsigned long long now=get_powerpc_tb() ; + TRACEN(k_t_entryexit,">") ; + for(x=0;x<xsize; x+=1 ) + { + for( y = 0; y<ysize; y+=1) + { + for( z = 0 ; z<zsize; z+=1 ) + { + unsigned int slot_base = x*(ysize*zsize) + y*zsize + z ; + for( core=0; core<k_injecting_cores; core+=1) + { + unsigned int slot = (slot_base << 2) | core ; + if( get_rcv_skb(&dma_tcp->rcvdemux,slot)) + { + unsigned long long timestamp=get_timestamp(&dma_tcp->rcvdemux,slot) ; + unsigned long long age=now-timestamp ; + TRACEN(k_t_request,"( %d %d %d ) age( %d %d %d %d )= 0x%08x%08x !!!", myx, myy, myz, x,y,z,core,(unsigned int)(age>>32),(unsigned int)age) ; + } + } + } + } + } + TRACEN(k_t_entryexit,"<") ; + } +#endif + +void __init +balancer_init(bgp_dma_balancer *balancer) +{ + int x; + for(x=0;x<k_pending_rcv_skb_classes;x+=1) + { + TRACEN(k_t_general,"balancer init[%d]",x) ; + skb_queue_head_init(&balancer->b[x].pending_rcv_skbs) ; + balancer->b[x].outstanding_counters=0 ; + } +} + +/* + * We set up 32 software injection FIFOs. We arrange them in 4 groups of 8; the group number is chosen as a function of the + * destination node, For the group of 8, we use 6 FIFOs to control 'bulk data' nominally one for each outbound link (though + * adaptive routing may take a packet out of a different link when the time comes); 1 FIFO to control single-packet frames + * which are sent high-priority because they may be 'ack' frames which will enable more data to flow from a far-end node; and + * 1 FIFO to control 'accept' packets which are sent high-priority because a scarce local resource (a reception counter) has been + * allocated to the transfer and we would like it underway as soon as possible. + */ + +void __init +dma_tcp_frames_init(dma_tcp_t *dma_tcp) + { + TRACEN(k_t_general,"sizeof(frame_injection_cb)=%d sizeof(DMA_PacketHeader_t)=%d sizeof(DMA_InjDescriptor_t)=%d",sizeof(frame_injection_cb),sizeof(DMA_PacketHeader_t),sizeof(DMA_InjDescriptor_t)) ; + + if( k_async_free ) setup_timer(&dma_tcp->transmission_free_skb_timer,dma_tcp_frames_transmission_free_skb,0) ; + setup_timer(&dma_tcp->runway_check_timer,dma_tcp_frames_runway_check,0) ; + dma_tcp->rcv_checked_time = jiffies ; + dma_tcp->packets_received_count = 0 ; + allocate_idma(&dma_tcp->idma) ; /* Buffering for packets-style injection DMA */ + allocate_rcv(&dma_tcp->rcvdemux,dma_tcp->node_count) ; /* Demultiplexing for packets-style reception */ +#if defined(USE_ADAPTIVE_ROUTING) + allocate_tx(&dma_tcp->tx_mux,dma_tcp->node_count) ; /* Multiplexing for adaptive-routing transmit */ +#endif +#if defined(TRACK_SEQUENCE) + track_sequence_init(dma_tcp->node_count) ; +#endif + init_demux_table(dma_tcp, dma_tcp->node_count) ; + /* Allocate injection FIFOs for 'packets' style access */ + { + int core ; + int direction ; + for( core=0; core< k_injecting_cores; core += 1 ) + { + for( direction=0; direction< k_injecting_directions; direction += 1 ) + { + dma_tcp->injFifoFramesPri[ core*k_injecting_directions+direction ] = 0 ; + dma_tcp->injFifoFramesLoc[ core*k_injecting_directions+direction ] = 0 ; + dma_tcp->injFifoFramesIds[ core*k_injecting_directions+direction ] = core*k_injecting_directions+direction ; + } + dma_tcp->injFifoFramesMap[ core*k_injecting_directions+0 ] = 0x80; /* Set deterministic injection FIFO per direction */ + dma_tcp->injFifoFramesMap[ core*k_injecting_directions+1 ] = 0x40; /* Set deterministic injection FIFO per direction */ + dma_tcp->injFifoFramesMap[ core*k_injecting_directions+2 ] = 0x20; /* Set deterministic injection FIFO per direction */ + dma_tcp->injFifoFramesMap[ core*k_injecting_directions+3 ] = 0x08; /* Set deterministic injection FIFO per direction */ + dma_tcp->injFifoFramesMap[ core*k_injecting_directions+4 ] = 0x04; /* Set deterministic injection FIFO per direction */ + dma_tcp->injFifoFramesMap[ core*k_injecting_directions+5 ] = 0x02; /* Set deterministic injection FIFO per direction */ + dma_tcp->injFifoFramesMap[ core*k_injecting_directions+6 ] = 0x11; /* Set 'high priority' FIFO for taxi channel */ + dma_tcp->injFifoFramesPri[ core*k_injecting_directions+k_injecting_directions-1 ] = 1 ; // 'high priority' for taxi channel +/* dma_tcp->injFifoFramesMap[ core*k_injecting_directions+6 ] = 0xee; // Set any FIFO for taxi channel */ +#if defined(USE_SKB_TO_SKB) + dma_tcp->injFifoFramesMap[ core*k_injecting_directions+7 ] = 0x11; /* Set 'high priority' FIFO for propose/accept channel */ +/* dma_tcp->injFifoFramesMap[ core*k_injecting_directions+7 ] = 0xee; // propose/accept channel can go in any fifo, but regular pri */ + dma_tcp->injFifoFramesPri[ core*k_injecting_directions+7 ] = 1 ; // 'high priority' for propose/accept channel +#endif + } + } + { + int ret = DMA_InjFifoGroupAllocate( k_InjectionFifoGroupFrames, + k_injecting_cores*k_injecting_directions, /* num inj fifos */ + dma_tcp->injFifoFramesIds, + dma_tcp->injFifoFramesPri, + dma_tcp->injFifoFramesLoc, + dma_tcp->injFifoFramesMap, + NULL, + NULL, + NULL, + NULL, + NULL, + & dma_tcp->injFifoGroupFrames ); + + TRACEN(k_t_general,"(=)DMA_InjFifoGroupAllocate rc=%d", ret ); + } + + { + int core ; + int direction ; + for( core=0; core< k_injecting_cores; core += 1 ) + { + for( direction=0; direction< k_injecting_directions; direction += 1 ) + { + int ret = DMA_InjFifoInitById( &dma_tcp->injFifoGroupFrames, + dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction], + dma_tcp->idma.idma_core[core].idma_direction[direction].idma_fifo, + dma_tcp->idma.idma_core[core].idma_direction[direction].idma_fifo, /* head */ + dma_tcp->idma.idma_core[core].idma_direction[direction].idma_fifo+1 /* end */ + ); + dma_tcp->idma.idma_core[core].idma_direction[direction].fifo_initial_head = + (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction]) ; + TRACEN(k_t_general,"(=)DMA_InjFifoInitById rc=%d initial_head=0x%08x", ret , dma_tcp->idma.idma_core[core].idma_direction[direction].fifo_initial_head); + } + } + } + /* register receive functions for the memfifo packets */ + dma_tcp->proto_issue_frames_single=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDataSingleActor, dma_tcp, 0, 0); +#if defined(USE_ADAPTIVE_ROUTING) + dma_tcp->proto_issue_frames_adaptive=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDataAdaptiveActor, dma_tcp, 0, 0); +#endif + +#if defined(USE_SKB_TO_SKB) + dma_tcp->proto_transfer_propose=DMA_RecFifoRegisterRecvFunction(issuePropActor, dma_tcp, 0, 0); + /* If we want to start up with everything flowing through the reception FIFO , do this by setting the 'eager limit' longer than the largest IP frame */ + dma_tcp->eager_limit = k_force_eager_flow ? 10000000 : 1024 ; /* Frames smaller than this get sent through the FIFO rather than the DMA (set it above 65536 to run everything through receive FIFO) */ + balancer_init(&dma_tcp->balancer) ; +#endif + dma_tcp_diagnose_init(dma_tcp) ; + TRACEN(k_t_general,"(=)DMA_RecFifoRegisterRecvFunction proto_issue_frames_single=%d", + dma_tcp->proto_issue_frames_single); + } diff --git a/drivers/net/bgp_torus/bgp_dma_tcp_quads.h b/drivers/net/bgp_torus/bgp_dma_tcp_quads.h new file mode 100644 index 00000000000000..ecc4815a6641f8 --- /dev/null +++ b/drivers/net/bgp_torus/bgp_dma_tcp_quads.h @@ -0,0 +1,394 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Description: Quadword ops for copying data, in particular torus-packet-sized + * (240 byte) sequences. Not currently used, but provided for + * reference. + * + * + ********************************************************************/ +#ifndef __BGP_DMA_TCP_QUADS_H__ +#define __BGP_DMA_TCP_QUADS_H__ + +/* TODO: take away the use of FP regs, now that software FIFO frames are 'rare', so we can avoid FP-in-kernel */ +/* Drop 240 bytes of payload from regs into 'software FIFO' */ +static inline void torus_frame_payload_store( + void * payloadptr) + { + unsigned int index1 ; + unsigned int index2 ; + torus_frame_payload *payload=payloadptr ; + + TRACEN(k_t_detail, "torus_payload_store payload=%p",payload) ; + asm ( + "li %[index1],16 \n\t" /* Indexing values */ + "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li %[index2],32 \n\t" /* Indexing values */ + "stfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */ + "li %[index1],48 \n\t" /* Indexing values */ + "stfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */ + "li %[index2],64 \n\t" /* Indexing values */ + "stfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */ + "li %[index1],80 \n\t" /* Indexing values */ + "stfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */ + "li %[index2],96 \n\t" /* Indexing values */ + "stfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */ + "li %[index1],112 \n\t" /* Indexing values */ + "stfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */ + "li %[index2],128 \n\t" /* Indexing values */ + "stfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */ + "li %[index1],144 \n\t" /* Indexing values */ + "stfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */ + "li %[index2],160 \n\t" /* Indexing values */ + "stfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */ + "li %[index1],176 \n\t" /* Indexing values */ + "stfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */ + "li %[index2],192 \n\t" /* Indexing values */ + "stfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */ + "li %[index1],208 \n\t" /* Indexing values */ + "stfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */ + "li %[index2],224 \n\t" /* Indexing values */ + "stfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */ + "stfpdx 15,%[index2],%[payload] \n\t" /* F3=Q15load */ + : /* outputs */ + "=m" (*payload), + [index1] "=&b" (index1), + [index2] "=&b" (index2) + : /* Inputs */ + [payload] "b" (payload) /* inputs */ + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", "fr15" + ); + } + +/* Load 240 bytes of payload from memory into regs */ +static inline void torus_frame_payload_load( + void * payloadptr) + { + unsigned int index1 ; + unsigned int index2 ; + torus_frame_payload *payload=payloadptr ; + + TRACEN(k_t_detail, "torus_payload_load payload=%p",payload) ; + asm ( + "li %[index1],16 \n\t" /* Indexing values */ + "lfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li %[index2],32 \n\t" /* Indexing values */ + "lfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */ + "li %[index1],48 \n\t" /* Indexing values */ + "lfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */ + "li %[index2],64 \n\t" /* Indexing values */ + "lfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */ + "li %[index1],80 \n\t" /* Indexing values */ + "lfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */ + "li %[index2],96 \n\t" /* Indexing values */ + "lfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */ + "li %[index1],112 \n\t" /* Indexing values */ + "lfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */ + "li %[index2],128 \n\t" /* Indexing values */ + "lfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */ + "li %[index1],144 \n\t" /* Indexing values */ + "lfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */ + "li %[index2],160 \n\t" /* Indexing values */ + "lfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */ + "li %[index1],176 \n\t" /* Indexing values */ + "lfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */ + "li %[index2],192 \n\t" /* Indexing values */ + "lfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */ + "li %[index1],208 \n\t" /* Indexing values */ + "lfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */ + "li %[index2],224 \n\t" /* Indexing values */ + "lfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */ + "lfpdx 15,%[index2],%[payload] \n\t" /* F3=Q15 load */ + : /* outputs */ + "=m" (*payload), + [index1] "=&b" (index1), + [index2] "=&b" (index2) + : /* Inputs */ + [payload] "b" (payload) /* inputs */ + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", "fr15" + ); + } + +static inline int torus_frame_payload_memcpy_base( + torus_frame_payload * target, + torus_frame_payload * source + ) + { + unsigned int index1 ; + unsigned int index2 ; + + TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ; + asm ( + "li %[index1],16 \n\t" /* Indexing values */ + "lfpdx 1,0,%[source] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li %[index2],32 \n\t" /* Indexing values */ + "lfpdx 2,%[index1],%[source] \n\t" /* F2=Q2 load */ + "li %[index1],48 \n\t" /* Indexing values */ + "lfpdx 3,%[index2],%[source] \n\t" /* F3=Q3 load */ + "li %[index2],64 \n\t" /* Indexing values */ + "lfpdx 4,%[index1],%[source] \n\t" /* F4=Q4 load */ + "li %[index1],80 \n\t" /* Indexing values */ + "lfpdx 5,%[index2],%[source] \n\t" /* F5=Q5 load */ + "li %[index2],96 \n\t" /* Indexing values */ + "lfpdx 6,%[index1],%[source] \n\t" /* F6=Q6 load */ + "li %[index1],112 \n\t" /* Indexing values */ + "lfpdx 7,%[index2],%[source] \n\t" /* F7=Q7 load */ + "li %[index2],128 \n\t" /* Indexing values */ + "lfpdx 8,%[index1],%[source] \n\t" /* F8=Q8 load */ + "li %[index1],144 \n\t" /* Indexing values */ + "lfpdx 9,%[index2],%[source] \n\t" /* F9=Q9 load */ + "li %[index2],160 \n\t" /* Indexing values */ + "lfpdx 10,%[index1],%[source] \n\t" /* F0=Q10 load */ + "li %[index1],176 \n\t" /* Indexing values */ + "lfpdx 11,%[index2],%[source] \n\t" /* F1=Q11 load */ + "li %[index2],192 \n\t" /* Indexing values */ + "lfpdx 12,%[index1],%[source] \n\t" /* F2=Q12 load */ + "li %[index1],208 \n\t" /* Indexing values */ + "lfpdx 13,%[index2],%[source] \n\t" /* F3=Q13 load */ + "li %[index2],224 \n\t" /* Indexing values */ + "lfpdx 14,%[index1],%[source] \n\t" /* F4=Q14 load */ + "lfpdx 15,%[index2],%[source] \n\t" /* F3=Q15 load */ + "li %[index1],16 \n\t" /* Indexing values */ + "stfpdx 1,0,%[target] \n\t" /* F1=Q1 load from (%[remaining_quads]) */ + "li %[index2],32 \n\t" /* Indexing values */ + "stfpdx 2,%[index1],%[target] \n\t" /* F2=Q2 load */ + "li %[index1],48 \n\t" /* Indexing values */ + "stfpdx 3,%[index2],%[target] \n\t" /* F3=Q3 load */ + "li %[index2],64 \n\t" /* Indexing values */ + "stfpdx 4,%[index1],%[target] \n\t" /* F4=Q4 load */ + "li %[index1],80 \n\t" /* Indexing values */ + "stfpdx 5,%[index2],%[target] \n\t" /* F5=Q5 load */ + "li %[index2],96 \n\t" /* Indexing values */ + "stfpdx 6,%[index1],%[target] \n\t" /* F6=Q6 load */ + "li %[index1],112 \n\t" /* Indexing values */ + "stfpdx 7,%[index2],%[target] \n\t" /* F7=Q7 load */ + "li %[index2],128 \n\t" /* Indexing values */ + "stfpdx 8,%[index1],%[target] \n\t" /* F8=Q8 load */ + "li %[index1],144 \n\t" /* Indexing values */ + "stfpdx 9,%[index2],%[target] \n\t" /* F9=Q9 load */ + "li %[index2],160 \n\t" /* Indexing values */ + "stfpdx 10,%[index1],%[target] \n\t" /* F0=Q10 load */ + "li %[index1],176 \n\t" /* Indexing values */ + "stfpdx 11,%[index2],%[target] \n\t" /* F1=Q11 load */ + "li %[index2],192 \n\t" /* Indexing values */ + "stfpdx 12,%[index1],%[target] \n\t" /* F2=Q12 load */ + "li %[index1],208 \n\t" /* Indexing values */ + "stfpdx 13,%[index2],%[target] \n\t" /* F3=Q13 load */ + "li %[index2],224 \n\t" /* Indexing values */ + "stfpdx 14,%[index1],%[target] \n\t" /* F4=Q14 load */ + "stfpdx 15,%[index2],%[target] \n\t" /* F3=Q15load */ + : /* outputs */ + "=m" (*target), + [index1] "=&b" (index1), + [index2] "=&b" (index2) + : /* Inputs */ + [source] "b" (source), /* inputs */ + [target] "b" (target) /* inputs */ + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14", "fr15" + ); + return 0 ; + } +#define loadreg(Reg,Name,Offset) \ + "li %[index]," #Offset " \n\t" \ + "lfpdx " #Reg ",%[index],%[" #Name "] \n\t" + +#define savereg(Reg,Name,Offset) \ + "li %[index]," #Offset " \n\t" \ + "stfpdx " #Reg ",%[index],%[" #Name "] \n\t" + + +static inline int torus_frame_payload_memcpy( + torus_frame_payload * target, + torus_frame_payload * source + ) + { + unsigned int index ; + + TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ; + asm ( + loadreg(0,source,0x00) + loadreg(1,source,0x10) + loadreg(2,source,0x20) + loadreg(3,source,0x30) + loadreg(4,source,0x40) + loadreg(5,source,0x50) + loadreg(6,source,0x60) + loadreg(7,source,0x70) + loadreg(8,source,0x80) + loadreg(9,source,0x90) + loadreg(10,source,0xa0) + loadreg(11,source,0xb0) + loadreg(12,source,0xc0) + loadreg(13,source,0xd0) + loadreg(14,source,0xe0) + savereg(0,target,0x00) + savereg(1,target,0x10) + savereg(2,target,0x20) + savereg(3,target,0x30) + savereg(4,target,0x40) + savereg(5,target,0x50) + savereg(6,target,0x60) + savereg(7,target,0x70) + savereg(8,target,0x80) + savereg(9,target,0x90) + savereg(10,target,0xa0) + savereg(11,target,0xb0) + savereg(12,target,0xc0) + loadreg(0,source,0xf0) /* Speculate that we will need this soon */ + savereg(13,target,0xd0) + loadreg(1,source,0x110) /* Speculate that we will need this soon */ + savereg(14,target,0xe0) + loadreg(2,source,0x130) /* Speculate that we will need this soon */ + + : /* outputs */ + "=m" (*target), + [index] "=&b" (index) + : /* Inputs */ + [source] "b" (source), /* inputs */ + [target] "b" (target) /* inputs */ + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14" + ); + return 0 ; + } + +static inline int torus_frame_payload_memcpy_try1( + torus_frame_payload * target, + torus_frame_payload * source + ) + { + unsigned int index ; + + TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ; + asm ( + loadreg(0,source,0x00) + loadreg(2,source,0x20) + loadreg(4,source,0x40) + loadreg(1,source,0x10) + savereg(0,target,0x00) + loadreg(6,source,0x60) + savereg(2,target,0x20) + loadreg(3,source,0x30) + savereg(4,target,0x40) + loadreg(8,source,0x80) + savereg(1,target,0x10) + loadreg(5,source,0x50) + savereg(6,target,0x60) + loadreg(10,source,0xa0) + savereg(3,target,0x30) + loadreg(7,source,0x70) + savereg(8,target,0x80) + loadreg(12,source,0xc0) + savereg(5,target,0x50) + loadreg(9,source,0x90) + savereg(10,target,0xa0) + loadreg(14,source,0xe0) + savereg(7,target,0x70) + loadreg(11,source,0xb0) + savereg(12,target,0xc0) + loadreg(13,source,0xd0) + savereg(9,target,0x90) + savereg(14,target,0xe0) + savereg(11,target,0xb0) + savereg(13,target,0xd0) + + : /* outputs */ + "=m" (*target), + [index] "=&b" (index) + : /* Inputs */ + [source] "b" (source), /* inputs */ + [target] "b" (target) /* inputs */ + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14" + ); + return 0 ; + } + +static inline int torus_frame_payload_memcpy_try2( + torus_frame_payload * target, + torus_frame_payload * source + ) + { + unsigned int index ; + + TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ; + asm ( + loadreg(0,source,0x00) + loadreg(1,source,0x10) + loadreg(2,source,0x20) + loadreg(4,source,0x40) + savereg(0,target,0x00) + loadreg(6,source,0x60) + savereg(2,target,0x20) + loadreg(3,source,0x30) + savereg(4,target,0x40) + loadreg(8,source,0x80) + savereg(1,target,0x10) + loadreg(5,source,0x50) + savereg(6,target,0x60) + loadreg(10,source,0xa0) + savereg(3,target,0x30) + loadreg(7,source,0x70) + savereg(8,target,0x80) + loadreg(12,source,0xc0) + savereg(5,target,0x50) + loadreg(9,source,0x90) + savereg(10,target,0xa0) + loadreg(14,source,0xe0) + savereg(7,target,0x70) + loadreg(11,source,0xb0) + savereg(12,target,0xc0) + loadreg(13,source,0xd0) + savereg(9,target,0x90) + savereg(14,target,0xe0) + savereg(11,target,0xb0) + savereg(13,target,0xd0) + + : /* outputs */ + "=m" (*target), + [index] "=&b" (index) + : /* Inputs */ + [source] "b" (source), /* inputs */ + [target] "b" (target) /* inputs */ + : "fr0", "fr1", "fr2", /* Clobbers */ + "fr3", "fr4", "fr5", + "fr6", "fr7", "fr8", + "fr9", "fr10", "fr11", + "fr12","fr13", "fr14" + ); + return 0 ; + } +#endif diff --git a/drivers/net/bgp_torus/bgp_fpu_memcpy.c b/drivers/net/bgp_torus/bgp_fpu_memcpy.c new file mode 100644 index 00000000000000..8b60a213e7c5cf --- /dev/null +++ b/drivers/net/bgp_torus/bgp_fpu_memcpy.c @@ -0,0 +1,825 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * + * Description: Blue Gene/P low-level driver for copy_tofrom_user thorough the + * parallel floating point unit + * + * + * + ********************************************************************/ +#define REQUIRES_DUMPMEM + +#include <linux/version.h> +#include <linux/module.h> + +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/highmem.h> +#include <linux/mman.h> +#include <linux/syscalls.h> +#include <linux/pagemap.h> + + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/time.h> +#include <asm/bitops.h> +#include <asm/time.h> + +#include "../bgp_network/bgp_net_traceflags.h" +#include <common/bgp_bitnumbers.h> +#include "bgp_bic_diagnosis.h" +#include "../bgp_network/bgdiagnose.h" +#include "../bgp_network/450_tlb.h" +/* Can drop bits out of COMPILED_TRACEMASK if we want to selectively compile out trace */ +#define COMPILED_TRACEMASK (0xffffffff) +/* #define COMPILED_TRACEMASK (k_t_error) */ + +#include <linux/KernelFxLog.h> + +#if defined(CONFIG_BLUEGENE_TORUS_TRACE) +extern int bgp_dma_tcp_tracemask ; +#define TRACEN(i,x...) KernelFxLog(bgp_dma_tcp_tracemask & (COMPILED_TRACEMASK & (i)),x) +#else +#define TRACEN(i,x...) +#endif + +#include "bgp_memcpy.h" + +struct ctl_table bgp_memcpy_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "use_dma", + .data = &bgp_memcpy_control.use_dma, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "verify_fpu", + .data = &bgp_memcpy_control.verify_fpu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "verify_dma", + .data = &bgp_memcpy_control.verify_dma, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "use_fpu", + .data = &bgp_memcpy_control.use_fpu, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "dma_threshold", + .data = &bgp_memcpy_control.dma_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "fpu_threshold", + .data = &bgp_memcpy_control.fpu_threshold, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "faults_until_disable", + .data = &bgp_memcpy_control.faults_until_disable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "cycles_per_packet", + .data = &bgp_memcpy_control.cycles_per_packet, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "rate_observe_report_count", + .data = &bgp_memcpy_control.rate_observe_report_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "handle_pagecrossing", + .data = &bgp_memcpy_control.handle_pagecrossing, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "fpu_handle_pagecrossing_read", + .data = &bgp_memcpy_control.fpu_handle_pagecrossing_read, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "fpu_handle_pagecrossing_write", + .data = &bgp_memcpy_control.fpu_handle_pagecrossing_write, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "mask", + .data = &bgp_memcpy_control.mask, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "assist_active", + .data = &bgp_memcpy_control.assist_active, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { + .ctl_name = CTL_UNNUMBERED, + .procname = "statistics", + .data = &bgp_dma_memcpy_statistics, + .maxlen = k_copy_statistics*sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + } , + { 0 }, +} ; + +static struct ctl_path memcpy_ctl_path[] = { + { .procname = "bgp", .ctl_name = 0, }, + { .procname = "copy", .ctl_name = 0, }, + { }, +}; +bgp_memcpy_control_t bgp_memcpy_control = + { + .use_dma = 0 , + .use_fpu = 1 , + .dma_threshold = 10000 , + .fpu_threshold = 512 , + .verify_dma = 0 , + .verify_fpu = 0 , + .cycles_per_packet = 20 , + .rate_observe_report_count = 0xffffffff , + .faults_until_disable = 1 , + .handle_pagecrossing = 1 , + .fpu_handle_pagecrossing_read = 0 , + .fpu_handle_pagecrossing_write = 0 , + .mask = 0 , + .assist_active = 0 + }; + +unsigned int bgp_dma_memcpy_statistics[k_copy_statistics] ; + + +static void cause_fallback(void) +{ + TRACEN(k_t_request,"Turning off DH memcpy") ; + bgp_memcpy_control.use_fpu = 0 ; + dma_memcpy_statistic(k_copy_cause_fallback) ; +} +enum { + k_page_shift = PAGE_SHIFT , + k_page_size = 1 << k_page_shift , + k_page_offset_mask = k_page_size-1 , + k_fpu_alignment = 16 , + k_fpu_align_mask = k_fpu_alignment - 1 +}; + +enum { + k_diag_not_mapped=0 +/* k_diagnose=1 */ +}; + +enum { + k_exploit_doublehummer = 1, + k_verify_doublehummer = 1, + k_fixup_faulty_memcpy=1, + k_premark=0 , + k_map_write_check=0 , + k_map_read_check=0 , + k_disable_after_too_many_faults=1 , + k_inhibit_crosspage_write = 1 , // Set this if you want to not handle writes which cross a user-space page boundary + k_inhibit_crosspage_read = 1 // Set this if you want to not handle reads which cross a user-space page boundary +}; +static void report_faulty_memcpy(void * dest, const void * src, unsigned long size) +{ + unsigned int * di = (unsigned int *) dest ; + const unsigned int * si = (const unsigned int *) src ; + unsigned char * dc = (unsigned char *) (dest) ; + const unsigned char * sc = (const unsigned char *) (src) ; + unsigned int x ; + unsigned int faultwordcount = 0 ; + if( k_disable_after_too_many_faults) + { + int faults_to_go=bgp_memcpy_control.faults_until_disable-1 ; + if( faults_to_go <= 0 ) + { + cause_fallback() ; + } + else + { + bgp_memcpy_control.faults_until_disable=faults_to_go ; + } + } + dma_memcpy_statistic(k_copy_verify_miscompares) ; + TRACEN(k_t_error,"dest=%p src=%p size=0x%08lx",dest,src,size) ; + for(x=0;x<size/sizeof(unsigned int);x+=1) + { + if( di[x] != si[x] ) + { + TRACEN(k_t_error,"(E) x=0x%08x di+x=%p si+x=%p di[x]=0x%08x si[x]=0x%08x", + x,di+x,si+x,di[x],si[x]) ; + if( k_fixup_faulty_memcpy) di[x]=si[x] ; + faultwordcount += 1 ; + } + } + if( dc[size-3] != sc[size-3]) + { + TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x", + size-3,dc+size-3,sc+size-3,dc[size-3],sc[size-3]) ; + if( k_fixup_faulty_memcpy) dc[size-3]=sc[size-3] ; + } + if( dc[size-2] != sc[size-2]) + { + TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x", + size-2,dc+size-2,sc+size-2,dc[size-2],sc[size-2]) ; + if( k_fixup_faulty_memcpy) dc[size-2]=sc[size-2] ; + } + if( dc[size-1] != sc[size-1]) + { + TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x", + size-1,dc+size-1,sc+size-1,dc[size-1],sc[size-1]) ; + if( k_fixup_faulty_memcpy) dc[size-1]=sc[size-1] ; + } + TRACEN(k_t_error,"%d/%ld words incorrectly copied",faultwordcount,size/sizeof(unsigned int)) ; + +} +/* Check that a 'memcpy' was accurately done ... */ +static void verify_memcpy(void * dest, const void * src, unsigned long size) +{ + unsigned int * di = (unsigned int *) dest ; + const unsigned int * si = (const unsigned int *) src ; + unsigned char * dc = (unsigned char *) (dest) ; + const unsigned char * sc = (const unsigned char *) (src) ; + unsigned int q = di[0] ^ si[0] ; + unsigned int x ; + dma_memcpy_statistic(k_copy_verify_attempts) ; + TRACEN(k_t_fpucopy,"dest=%p src=%p size=0x%08lx di[0]=0x%08x si[0]=0x%08x",dest,src,size,di[0],si[0]) ; + for(x=1;x<size/sizeof(unsigned int);x+=1) + { + q |= di[x] ^ si[x] ; + } + q |= (dc[size-3] ^ sc[size-3]) |(dc[size-2] ^ sc[size-2]) |(dc[size-1] ^ sc[size-1]) ; + if(q) report_faulty_memcpy(dest,src,size) ; +} + +typedef struct { unsigned char c[128] ; } miniblock ; + +#define nl "\n" +/* Returns 0 for a good copy, 1 if an exception (unmapped storage) occurred */ +static int doublehummer_copy_unroll(void *to, const void *from, int count) +{ + int x1=0x10 ; + int x2=0x20 ; + int x3=0x30 ; + int x4=0x40 ; + int x5=0x50 ; + int x6=0x60 ; + int x7=0x70 ; + int x8=0x80 ; + int xa=0xa0 ; + int xc=0xc0 ; + int xe=0xe0 ; + int rc ; + asm volatile ( + "mtctr %[count]" nl + "100: lfpdx 0,0,%[src]" nl + "101: lfpdx 2,%[index2],%[src]" nl + "102: lfpdx 4,%[index4],%[src]" nl + "103: lfpdx 6,%[index6],%[src]" nl + "104: lfpdx 1,%[index1],%[src]" nl + "105: lfpdx 3,%[index3],%[src]" nl + "106: lfpdx 5,%[index5],%[src]" nl + "107: lfpdx 7,%[index7],%[src]" nl + "108: stfpdx 0,0 ,%[dst]" nl + "109: lfpdx 0,%[index8],%[src]" nl + "110: stfpdx 2,%[index2],%[dst]" nl + "111: lfpdx 2,%[indexa],%[src]" nl + "112: stfpdx 4,%[index4],%[dst]" nl + "113: lfpdx 4,%[indexc],%[src]" nl + "114: stfpdx 6,%[index6],%[dst]" nl + "115: lfpdx 6,%[indexe],%[src]" nl + "bdz 1f" nl + + "0:" nl + "addi %[src],%[src],128" nl + + "116: stfpdx 1,%[index1],%[dst]" nl + "117: lfpdx 1,%[index1],%[src]" nl + "118: stfpdx 0,%[index8],%[dst]" nl + "119: lfpdx 0,%[index8],%[src]" nl + + "120: stfpdx 3,%[index3],%[dst]" nl + "121: lfpdx 3,%[index3],%[src]" nl + "122: stfpdx 2,%[indexa],%[dst]" nl + "123: lfpdx 2,%[indexa],%[src]" nl + + "124: stfpdx 5,%[index5],%[dst]" nl + "125: lfpdx 5,%[index5],%[src]" nl + "126: stfpdx 4,%[indexc],%[dst]" nl + "127: lfpdx 4,%[indexc],%[src]" nl + + "128: stfpdx 7,%[index7],%[dst]" nl + "129: lfpdx 7,%[index7],%[src]" nl + "130: stfpdx 6,%[indexe],%[dst]" nl + "addi %[dst],%[dst],128" nl + "131: lfpdx 6,%[indexe],%[src]" nl + + "bdnz 0b" nl + + + "1:" nl + "addi %[src],%[src],128" nl + + "132: stfpdx 1,%[index1],%[dst]" nl + "133: lfpdx 1,%[index1],%[src]" nl + "134: stfpdx 0,%[index8],%[dst]" nl + + "135: stfpdx 3,%[index3],%[dst]" nl + "136: lfpdx 3,%[index3],%[src]" nl + "137: stfpdx 2,%[indexa],%[dst]" nl + + "138: stfpdx 5,%[index5],%[dst]" nl + "139: lfpdx 5,%[index5],%[src]" nl + "140: stfpdx 4,%[indexc],%[dst]" nl + + "141: stfpdx 7,%[index7],%[dst]" nl + "142: lfpdx 7,%[index7],%[src]" nl + "143: stfpdx 6,%[indexe],%[dst]" nl + + "addi %[dst],%[dst],128" nl + "144: stfpdx 1,%[index1],%[dst]" nl + "145: stfpdx 3,%[index3],%[dst]" nl + "146: stfpdx 5,%[index5],%[dst]" nl + "147: stfpdx 7,%[index7],%[dst]" nl +/* Following section needed to handle exceptions (user code passing addresses which SEGV) */ + "li %[rc],0" nl + "b 3f" nl + + "2:" nl + "li %[rc],1" nl + "3:" nl + ".section __ex_table,\"a\"" nl + + ".align 2" nl + ".long 100b,2b" nl + ".long 101b,2b" nl + ".long 102b,2b" nl + ".long 103b,2b" nl + ".long 104b,2b" nl + ".long 105b,2b" nl + ".long 106b,2b" nl + ".long 107b,2b" nl + ".long 108b,2b" nl + ".long 109b,2b" nl + ".long 110b,2b" nl + ".long 111b,2b" nl + ".long 112b,2b" nl + ".long 113b,2b" nl + ".long 114b,2b" nl + ".long 115b,2b" nl + ".long 116b,2b" nl + ".long 117b,2b" nl + ".long 118b,2b" nl + ".long 119b,2b" nl + ".long 120b,2b" nl + ".long 121b,2b" nl + ".long 122b,2b" nl + ".long 123b,2b" nl + ".long 124b,2b" nl + ".long 125b,2b" nl + ".long 126b,2b" nl + ".long 127b,2b" nl + ".long 128b,2b" nl + ".long 129b,2b" nl + ".long 130b,2b" nl + ".long 131b,2b" nl + ".long 132b,2b" nl + ".long 133b,2b" nl + ".long 134b,2b" nl + ".long 135b,2b" nl + ".long 136b,2b" nl + ".long 137b,2b" nl + ".long 138b,2b" nl + ".long 139b,2b" nl + ".long 140b,2b" nl + ".long 141b,2b" nl + ".long 142b,2b" nl + ".long 143b,2b" nl + ".long 144b,2b" nl + ".long 145b,2b" nl + ".long 146b,2b" nl + ".long 147b,2b" nl + ".text" nl + + : /* Outputs */ + [rc] "=b" (rc) + : /* Inputs */ + [dst] "b" (to), + [src] "b" (from), + [count] "r" (count), + [index1] "b" (x1), + [index2] "b" (x2), + [index3] "b" (x3), + [index4] "b" (x4), + [index5] "b" (x5), + [index6] "b" (x6), + [index7] "b" (x7), + [index8] "b" (x8), + [indexa] "b" (xa), + [indexc] "b" (xc), + [indexe] "b" (xe) + : /* Clobbers */ + "memory", + "fr0","fr1","fr2","fr3", + "fr4","fr5","fr6","fr7" + ) ; + + return rc ; +} +static void doublehummer_store_quads(void *dest, int count, const double *v0, const double *v1) +{ + asm volatile ( + "mtctr %[count]" nl + "lfdx 0,0,%[v0]" nl + "lfsdx 0,0,%[v1]" nl + "0: stfpdx 0,0,%[dest]" nl + "addi %[dest],%[dest],16" nl + "bdnz 0b" nl + : /* Outputs */ + : /* Inputs */ + [dest] "b" (dest), + [v0] "b" (v0), + [v1] "b" (v1), + [count] "r" (count) + : /* Clobbers */ + "memory", + "fr0" + ) ; + +} + +/* Try a 'doublehummer' memcpy, return 0 if we could and 1 if we couldn't */ +static int doublehummer_memcpy(void * dest, const void * src, unsigned long size) +{ + if( k_exploit_doublehummer) + { + unsigned int di = (unsigned int) dest ; + unsigned int si = (unsigned int) src ; + unsigned int mutual_alignment = (di - si) & k_fpu_align_mask ; + unsigned int source_alignment = si & k_fpu_align_mask ; + unsigned int precopy_size = source_alignment ? (k_fpu_alignment - source_alignment) : 0 ; + unsigned int miniblock_di = di + precopy_size ; + unsigned int miniblock_si =si + precopy_size ; + unsigned int miniblock_size = size - precopy_size ; + unsigned int miniblock_count=miniblock_size/sizeof(miniblock) ; + unsigned int size_floor=miniblock_count*sizeof(miniblock) ; + unsigned int size_tail = size - size_floor - precopy_size ; + unsigned long flags ; + int rc ; + if( mutual_alignment ) + { + dma_memcpy_statistic(k_copy_unaligned_rejects) ; + return 1 ; // Alignment between source and destination not good enough + } + /* The source and dest are mutually aligned. Do we need a 1-15 byte pre-copy to get to quad alignment ? */ + if( precopy_size ) + { + rc = __real__copy_tofrom_user(dest, src, precopy_size) ; + if(rc) + { + dma_memcpy_statistic(k_precopy_segv_trap) ; + return 1 ; + } +/* memcpy(dest,src,precopy_size) ; */ + } + enable_kernel_fp() ; + +/* The copy should work with interrupts enabled, but whenever I tried it there were occasional errors in copying. */ +/* TODO: Diagnose why, fix, and run the copy without disabling. Same for the 'page copy' and 'page clear later */ + local_irq_save(flags) ; + rc = doublehummer_copy_unroll((void *)miniblock_di,(void *)miniblock_si,miniblock_count-1) ; + local_irq_restore(flags) ; + if( rc ) + { + dma_memcpy_statistic(k_copy_segv_trap) ; + return 1 ; + } + + if( size_tail ) + { + /* TODO: Fix up what happens if this causes a 'segv' */ + rc = __real__copy_tofrom_user((void *)(miniblock_di+size_floor), (void *)(miniblock_si+size_floor), size_tail) ; + if(rc) + { + dma_memcpy_statistic(k_postcopy_segv_trap) ; + return 1 ; + } +/* memcpy((void *)(miniblock_di+size_floor),(void *)(miniblock_si+size_floor),size_tail) ; */ + } + if( k_verify_doublehummer && bgp_memcpy_control.verify_fpu) + { + verify_memcpy(dest,src,size) ; + } + return 0 ; + } + else + { + return 1 ; + } +} + +static unsigned int operate_vcopy(unsigned long address, void * partner_vaddr, unsigned long size) +{ + TRACEN(k_t_detail,"address=0x%08lx partner_vaddr=%p size=0x%08lx",address,partner_vaddr,size) ; + return doublehummer_memcpy(partner_vaddr,(const void *)address,size) ; +} + + +static int all_pages_mapped_read(unsigned long address, unsigned long size) +{ + unsigned int start_page=(address >> k_page_shift) ; + unsigned int end_page=((address+size) >> k_page_shift) ; + unsigned int page_count = end_page-start_page+1 ; + unsigned int x ; + if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK + if( k_inhibit_crosspage_read && page_count > 1 && 0 == bgp_memcpy_control.fpu_handle_pagecrossing_read) + { + /* TODO: Should be able to handle page-crossings, but have seen kernel traps related to this */ + dma_memcpy_statistic(k_copy_crosspage_limitation_rejects) ; + return 1 ; + } + /* Defend against the possibility that the user application has posted an unmapped address */ + for(x=0;x<page_count;x+=1) + { + int pageInt ; + int __user * pageIntP = (int __user *) ((start_page+x) << k_page_shift) ; + if( get_user(pageInt,pageIntP) ) + { + TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ; + if( k_diag_not_mapped) + { + tlb_t t ; + unsigned int r=v_to_r_maybe((void *)address, &t) ; + TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ; + TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ; + diagnose_tlb(&t) ; + } + + return 1; + } + + } + return 0 ; +} +static int all_pages_mapped_write(unsigned long address, unsigned long size) +{ + unsigned int start_page=(address >> k_page_shift) ; + unsigned int end_page=((address+size) >> k_page_shift) ; + unsigned int page_count = end_page-start_page+1 ; + unsigned int x ; +/* int pageInt ; */ + char __user * pageCharP = (char __user *) address ; + if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK + if( k_inhibit_crosspage_write && page_count > 1 && 0 == bgp_memcpy_control.fpu_handle_pagecrossing_write ) + { + /* TODO: Should be able to handle page-crossings, but have seen kernel traps related to this */ + dma_memcpy_statistic(k_copy_crosspage_limitation_rejects) ; + return 1 ; + } + if(put_user(0,pageCharP)) + { + TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ; + if( k_diag_not_mapped) + { + tlb_t t ; + unsigned int r=v_to_r_maybe((void *)address, &t) ; + TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ; + TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ; + diagnose_tlb(&t) ; + } + + return 1; + } + /* Defend against the possibility that the user application has posted an unmapped address */ + for(x=1;x<page_count;x+=1) + { +/* int pageInt ; */ + char __user * pageCharP = (char __user *) ((start_page+x) << k_page_shift) ; +/* put_user(current_injection_used, report) ; */ + if( put_user(0,pageCharP) ) + { + TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ; + if( k_diag_not_mapped) + { + tlb_t t ; + unsigned int r=v_to_r_maybe((void *)address, &t) ; + TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ; + TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ; + diagnose_tlb(&t) ; + } + + return 1; + } + + } + return 0 ; +} + +static int instrument_copy_user_address_by_touch(unsigned long address, unsigned long size,void * partner_vaddr) +{ + + if( k_map_read_check && all_pages_mapped_read(address,size)) + { + dma_memcpy_statistic(k_copy_source_rejects) ; + return 1 ; + } + if( k_map_write_check && all_pages_mapped_write((unsigned int) partner_vaddr,size)) + { + dma_memcpy_statistic(k_copy_target_rejects) ; + return 1 ; + } + + /* Looks like we can run the transfer with the FPU */ + return operate_vcopy(address,partner_vaddr,size) ; + +} + +static int instrument_copy_tofrom_user(unsigned long to, unsigned long from, unsigned long size) +{ + + int rc=1 ; + TRACEN(k_t_fpucopy,"(>)") ; + /* TODO: Check by touching and poking that all pages in 'to' and 'from' are appropriately mapped, before going into the hummer loop */ + rc= instrument_copy_user_address_by_touch(from,size,(void *)to) ; + TRACEN(k_t_fpucopy,"(<) rc=%d",rc) ; + return rc ; +} + +enum { + k_enable_dma_memcpy = 1 // TODO: Get DMA memcopy working, and enable it here +}; +/* Returns 1 if we could DMA-copy things, 0 if we couldn't */ +extern unsigned long bgp_fpu_instrument_copy_tofrom_user(void *to, + const void __user *from, unsigned long size) +{ + if( k_premark && bgp_memcpy_control.verify_dma) memset(to,0x11,size) ; // Mark the memory so we know if we write it +#if defined(CONFIG_BLUEGENE_DMA_MEMCPY) + if( k_enable_dma_memcpy && bgp_memcpy_control.use_dma) + { + if( bgp_memcpy_control.mask) + { + unsigned long flags ; + unsigned long rc ; + local_irq_save(flags) ; + rc = bgp_dma_instrument_copy_tofrom_user(to, from, size) ; + local_irq_restore(flags) ; + return rc ; + } + else + { + return bgp_dma_instrument_copy_tofrom_user(to, from, size) ; + } + } + else +#endif + { + dma_memcpy_statistic(k_copy_tofrom_user_calls) ; + if( size > 0 && bgp_memcpy_control.use_fpu && size >= bgp_memcpy_control.fpu_threshold ) + { + { + TRACEN(k_t_fpucopy,"to=%p from=%p size=0x%08lx",to,from,size) ; + { + unsigned long rc= instrument_copy_tofrom_user((unsigned long)to,(unsigned long)from,size) ; + dma_memcpy_statistic((0==rc) ? k_copy_accelerate_successes : k_copy_accelerate_rejects) ; + + return rc ; + } + + } + } + dma_memcpy_statistic(k_copy_size_rejects) ; + return 1 ; // Not copied, size under threshold + } +} + +#if defined(CONFIG_WRAP_COPY_TOFROM_USER) +void copy_page(void *to, void *from) +{ + TRACEN(k_t_fpucopy,"to=%p from=%p",to,from) ; + if(bgp_memcpy_control.assist_active ) + { + unsigned long flags ; + unsigned int miniblock_count = k_page_size / sizeof(miniblock) ; + enable_kernel_fp() ; + + local_irq_save(flags) ; + doublehummer_copy_unroll((void *)to,(void *)from,miniblock_count-1) ; + local_irq_restore(flags) ; + } + else + { + memcpy(to,from,k_page_size) ; + } + +} + +static const double v=0.0 ; +void clear_pages(void *p, int order) +{ + TRACEN(k_t_fpucopy,"p=%p order=%d",p,order) ; + if(bgp_memcpy_control.assist_active ) + { + unsigned int quadcount=(k_page_size/16) << order ; + unsigned long flags ; + enable_kernel_fp() ; +/* double v=0.0 ; */ + local_irq_save(flags) ; + doublehummer_store_quads(p,quadcount,&v,&v) ; + local_irq_restore(flags) ; + + + } + else + { + memset(p,0,k_page_size << order) ; + } + + +} +#endif + +void __init +bgp_fpu_register_memcpy_sysctl(void) +{ + register_sysctl_paths(memcpy_ctl_path,bgp_memcpy_table) ; + TRACEN(k_t_init, "memcpy sysctl registered") ; + +} diff --git a/drivers/net/bgp_torus/bgp_memcpy.h b/drivers/net/bgp_torus/bgp_memcpy.h new file mode 100644 index 00000000000000..b4aa80f32a30de --- /dev/null +++ b/drivers/net/bgp_torus/bgp_memcpy.h @@ -0,0 +1,204 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Author: Chris Ward <tjcw@uk.ibm.com> + * + * Description: Blue Gene low-level driver copy_tofrom_user using + * BlueGene-specific hardware + * + * + ********************************************************************/ +#ifndef __BGP_MEMCPY_H__ +#define __BGP_MEMCPY_H__ + + +typedef struct +{ + int use_dma ; + int use_fpu ; + int dma_threshold ; /* Use the BGP DMA unit for copy_tofrom_user this size or larger */ + int fpu_threshold ; /* Use the BGP FPU for copy_tofrom_user this size or larger */ + int verify_dma ; /* Whether to verify the copy (for diagnostics) */ + int verify_fpu ; /* Whether to verify the copy (for diagnostics) */ + int cycles_per_packet ; /* Estimate of number of cycles per packet, for local spin before looking at counters */ + int faults_until_disable ; /* Number of faults until we disable acceleration */ + int rate_observe_report_count ; /* Number of times out of 256 that the rate gets displayed */ + int handle_pagecrossing ; /* Whether the DMA version should attempt to handle page-boundary-crossings */ + int fpu_handle_pagecrossing_read ; /* Whether the FPU version should attempt to handle page-boundary-crossings on reads */ + int fpu_handle_pagecrossing_write ; /* Whether the FPU version should attempt to handle page-boundary-crossings on writes */ + int mask ; /* Whether to mask interrupts */ + int assist_active ; /* Whether to assist copypage and clearpages */ + /* int trace_count ; */ /* Number of trace records to cut before stopping */ +} bgp_memcpy_control_t ; + +extern bgp_memcpy_control_t bgp_memcpy_control ; + +enum { + k_copy_tofrom_user_calls , + k_copy_cause_fallback , + k_copy_accelerate_successes , + k_copy_accelerate_rejects , + + k_copy_size_rejects , + k_copy_spanpage_rejects , + k_copy_crosspage_limitation_rejects , + k_copy_inconsistent_tlb_1_rejects , + + k_copy_inconsistent_tlb_2_rejects , + k_copy_no_counter_rejects , + k_copy_source_tlb_rejects , + k_copy_target_tlb_rejects , + + k_copy_source_rejects , + k_copy_target_rejects , + k_copy_unaligned_rejects , + k_copy_verify_attempts , + + k_copy_verify_miscompares , + k_copy_tlb_touches , + k_copy_await_idle_zero , + k_copy_await_idle_low , + + k_copy_await_idle_high , + k_copy_inconsistent_tlb_1_info , + k_copy_inconsistent_tlb_2_info , + k_copy_segv_trap , + + k_precopy_segv_trap , + k_postcopy_segv_trap , + + k_copy_statistics +}; + +/* The underlying assembler copy function, returns 0 iff it copies all the data */ +extern unsigned long __real__copy_tofrom_user(void *to, + const void __user *from, unsigned long size) ; + +extern unsigned int bgp_dma_memcpy_statistics[k_copy_statistics] ; +static inline void dma_memcpy_statistic(unsigned int X) +{ + bgp_dma_memcpy_statistics[X] += 1 ; +} + +extern unsigned long bgp_dma_instrument_copy_tofrom_user(void *to, + const void *from, unsigned long size) ; +extern unsigned long bgp_fpu_instrument_copy_tofrom_user(void *to, + const void *from, unsigned long size) ; + +enum +{ + k_diagnose = 1 +}; +/* Items to record about a copy op, for diagnosing faults */ +typedef struct +{ + const void * vaddr ; + unsigned int tlb_v ; + unsigned int pageid ; + unsigned int xlat ; + unsigned int attrib ; +} tlb_t ; + +typedef struct +{ + void * to_vaddr ; + const void * from_vaddr ; + unsigned int size ; + tlb_t a_tlb ; + tlb_t b_tlb ; + unsigned int a_raddress ; + unsigned int b_raddress ; + unsigned int from_check_pre ; + unsigned int to_check_pre ; + unsigned int from_check_post ; + unsigned int to_check_post ; + unsigned int frag_index ; +} copy_op_t ; + +static void diagnose_tlb(tlb_t *t) +{ + unsigned int t0=t->pageid ; + unsigned int t1=t->xlat ; + unsigned int t2=t->attrib ; + TRACEN(k_t_request,"vaddr=%p tlb_v=0x%08x %08x-%08x-%08x ts=%d tid=0x%02x epn=0x%08x rpn=0x%01x-%08x size=%s WIMG=%d%d%d%d U=%d%d%d%d V=%d uxwr=%d sxwr=%d", + t->vaddr,t->tlb_v,t0,t1,t2, + (t0 & TLB0_TS) ? 1 : 0, + (t2 >> 22) & 0xff , + TLB0_EPN_1K(t0), + TLB1_ERPN(t1),TLB1_RPN_1K(t1), + TLB_SIZES[(t0 & 0xF0) >> 4], + (t2 & TLB2_W) ? 1 : 0, + (t2 & TLB2_I) ? 1 : 0, + (t2 & TLB2_M) ? 1 : 0, + (t2 & TLB2_G) ? 1 : 0, + (t2 & TLB2_U0) ? 1 : 0, + (t2 & TLB2_U1) ? 1 : 0, + (t2 & TLB2_U2) ? 1 : 0, + (t2 & TLB2_U3) ? 1 : 0, + (t0 & TLB0_V) ? 1 : 0, + (t2 >> 3) & 7, + t2 & 7 + ) ; +} +static void diagnose_faulty_copy(copy_op_t *c) __attribute__((unused)) ; +static void diagnose_faulty_copy(copy_op_t *c) +{ + TRACEN(k_t_request,"from_vaddr=%p to_vaddr=%p size=0x%08x a_raddress=0x%08x b_raddress=0x%08x from_check_pre=0x%08x to_check_pre=0x%08x from_check_post=0x%08x to_check_post=0x%08x frag_index=%d", + c->from_vaddr,c->to_vaddr,c->size,c->a_raddress,c->b_raddress,c->from_check_pre,c->from_check_post,c->to_check_pre,c->to_check_post,c->frag_index) ; + diagnose_tlb(&c->a_tlb) ; + diagnose_tlb(&c->b_tlb) ; +} + +/* Find the real store address for a virtual address, by looking at the TLB and causing a TLB miss if needed */ +static unsigned int v_to_r_maybe(const void * vaddr,tlb_t *t) +{ + unsigned int vaddr_int=(unsigned int)vaddr ; + int tlbx=search_tlb_v(vaddr_int) ; + int pageid=get_tlb_pageid(tlbx) ; + int xlat=get_tlb_xlat(tlbx) ; + int attrib=get_tlb_attrib(tlbx) ; + int tlbx1=search_tlb_v((unsigned int)vaddr) ; + if( k_diagnose) + { + t->vaddr = vaddr ; + t->tlb_v = tlbx1 ; + t->pageid = pageid ; + t->xlat = xlat ; + t->attrib = attrib ; + } + if( (tlbx == tlbx1) /* Translation didn't change under me due to e.g. interrupt */ + && ((pageid & TLB0_V) != 0) /* TLB is valid */ + && ((tlbx & 0x20000000) != 0) /* search_tlb_v sets this bit if it found a translation */ + ) + { + unsigned int epn = TLB0_EPN_1K(pageid) ; // virtual page for the TLB + unsigned int rpn = TLB1_RPN_1K(xlat) ; // real page for the TLB + unsigned int result = (vaddr_int-epn) + rpn ; + TRACEN(k_t_dmacopy,"vaddr=%p tlbx=0x%08x pageid=0x%08x xlat=0x%08x attrib=0x%08x epn=0x%08x rpn=0x%08x result=0x%08x", + vaddr,tlbx,pageid,xlat,attrib,epn,rpn,result) ; + return result ; + + } + else + { + TRACEN(k_t_dmacopy,"vaddr=%p tlbx=0x%08x pageid=0x%08x tlbx1=0x%08x unmapped", + vaddr,tlbx,pageid,tlbx1) ; + return (unsigned int) -1 ; // Not mapped + } +} + +#endif diff --git a/drivers/net/bgp_torus/bgtor.h b/drivers/net/bgp_torus/bgtor.h new file mode 100644 index 00000000000000..49bceff315efda --- /dev/null +++ b/drivers/net/bgp_torus/bgtor.h @@ -0,0 +1,310 @@ +/********************************************************************* + * + * Description: Torus definitions + * + * Copyright (c) 2007, 2008 International Business Machines + * Volkmar Uhlig <vuhlig@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + ********************************************************************/ +#ifndef __DRIVERS__BLUEGENE__TORUS_H__ +#define __DRIVERS__BLUEGENE__TORUS_H__ + +/* #include "bglink.h" */ +#include <linux/ioctl.h> + +#define TORUS_MAX_MTU (39 * 240) + +#define BGP_TORUS_MAX_IRQS 96 + +#define BGP_TORUS_GROUPS 4 +#define BGP_TORUS_DMA_SIZE (sizeof(struct torus_dma) * BGP_TORUS_GROUPS) + +#define BGP_TORUS_INJ_FIFOS 32 +#define BGP_TORUS_RCV_FIFOS 8 +#define BGP_TORUS_COUNTERS 64 +#define BGP_TORUS_DMA_REGIONS 8 + +#define BGP_TORUS_TX_ENTRIES 256 +#define BGP_TORUS_RX_ENTRIES 512 + +#define BGP_TORUS_USER_GROUP 1 + +/* IOCTLs for UL DMA */ +#define TORUS_IOCTL 'T' +#define TORUS_ALLOC_TX_COUNTER _IO(TORUS_IOCTL, 1) +#define TORUS_ALLOC_RX_COUNTER _IO(TORUS_IOCTL, 2) +#define TORUS_ALLOC_TX_FIFO _IO(TORUS_IOCTL, 3) +#define TORUS_ALLOC_RX_FIFO _IO(TORUS_IOCTL, 4) +#define TORUS_FREE_TX_COUNTER _IO(TORUS_IOCTL, 5) +#define TORUS_FREE_RX_COUNTER _IO(TORUS_IOCTL, 6) +#define TORUS_FREE_TX_FIFO _IO(TORUS_IOCTL, 7) +#define TORUS_FREE_RX_FIFO _IO(TORUS_IOCTL, 8) +#define TORUS_REGISTER_TX_MEM _IO(TORUS_IOCTL, 9) +#define TORUS_REGISTER_RX_MEM _IO(TORUS_IOCTL, 10) +#define TORUS_DMA_RANGECHECK _IO(TORUS_IOCTL, 11) + + +struct torus_fifo { + u32 start; + u32 end; + volatile u32 head; + volatile u32 tail; +}; + +struct torus_dma { + struct { + struct torus_fifo fifo[BGP_TORUS_INJ_FIFOS]; /* 0 - 1ff */ + u32 empty; /* 200 */ + u32 __unused0; /* 204 */ + u32 avail; /* 208 */ + u32 __unused1; /* 20c */ + u32 threshold; /* 210 */ + u32 __unused2; /* 214 */ + u32 clear_threshold; /* 218 */ + u32 __unused3; /* 21c */ + u32 dma_active; /* 220 */ + u32 dma_activate; /* 224 */ + u32 dma_deactivate; /* 228 */ + u8 __unused4[0x100-0x2c]; /* 22c - 2ff */ + + u32 counter_enabled[2]; /* 300 */ + u32 counter_enable[2]; /* 308 */ + u32 counter_disable[2]; /* 310 */ + u32 __unused5[2]; /* 318 */ + u32 counter_hit_zero[2]; /* 320 */ + u32 counter_clear_hit_zero[2]; /* 328 */ + u32 counter_group_status; /* 330 */ + u8 __unused6[0x400-0x334]; /* 334 - 3ff */ + + struct { + u32 counter; + u32 increment; + u32 base; + u32 __unused; + } counter[BGP_TORUS_COUNTERS]; /* 400 - 7ff */ + } __attribute__((packed)) inj; + + struct { + struct torus_fifo fifo[BGP_TORUS_RCV_FIFOS]; /* 800 - 87f */ + struct torus_fifo hdrfifo; /* 880 - 88f */ + u8 __unused0[0x900-0x890]; /* 890 - 900 */ + + u32 glob_ints[16]; /* 900 - 93f */ + u8 __unused1[0xa00-0x940]; /* 940 - 9ff */ + + u32 empty[2]; /* a00 */ + u32 available[2]; /* a08 */ + u32 threshold[2]; /* a10 */ + u32 clear_threshold[2]; /* a18 */ + u8 __unused2[0xb00 - 0xa20]; /* a20 - aff */ + + u32 counter_enabled[2]; /* b00 */ + u32 counter_enable[2]; /* b08 */ + u32 counter_disable[2]; /* b10 */ + u32 __unused3[2]; /* b18 */ + u32 counter_hit_zero[2]; /* b20 */ + u32 counter_clear_hit_zero[2]; /* b28 */ + u32 counter_group_status; /* b30 */ + u8 __unused4[0xc00 - 0xb34]; /* b34 - bff */ + + struct { + u32 counter; + u32 increment; + u32 base; + u32 limit; + } counter[BGP_TORUS_COUNTERS]; /* c00 - fff */ + } __attribute__((packed)) rcv; +}; + +enum { + torus_dir_xplus = 0x20, + torus_dir_xminus = 0x10, + torus_dir_yplus = 0x08, + torus_dir_yminus = 0x04, + torus_dir_zplus = 0x02, + torus_dir_zminus = 0x01 +}; + +union torus_fifo_hw_header { + struct { + u32 csum_skip : 7; /* number of shorts to skip in chksum */ + u32 sk : 1; /* 0= use csum_skip, 1 skip pkt */ + u32 dirhint : 6; /* x-,x+,y-,y+,z-,z+ */ + u32 deposit : 1; /* multicast deposit */ + u32 pid0 : 1; /* destination fifo group MSb */ + u32 size : 3; /* size: (size + 1) * 32bytes */ + u32 pid1 : 1; /* destination fifo group LSb */ + u32 dma : 1; /* 1=DMA mode, 0=Fifo mode */ + u32 dyn_routing : 1; /* 1=dynamic routing, */ + /* 0=deterministic routing */ + u32 virt_channel : 2; /* channel (0=Dynamic CH0, */ + /* 1=Dynamic CH1, 2=Bubble, 3=Prio) */ + u32 dest_x : 8; + u32 dest_y : 8; + u32 dest_z : 8; + u32 reserved : 16; + }; + u8 raw8[8]; + u32 raw32[2]; +} __attribute__((packed)); + +union torus_dma_hw_header { + struct { + u32 : 30; + u32 prefetch : 1; + u32 local_copy : 1; + u32 : 24; + u32 counter : 8; + u32 base; + u32 length; + }; + u32 raw32[2]; +} __attribute__((packed)); + +union torus_dma_sw_header { + struct { + u32 offset; + u8 counter_id; + u8 bytes; + u8 unused : 6; + u8 pacing : 1; + u8 remote_get : 1; + }; + u32 raw32[2]; +} __attribute__((packed)); + +union torus_inj_desc { + u32 raw32[8]; + struct { + union torus_dma_hw_header dma_hw; + union torus_fifo_hw_header fifo; + union torus_dma_sw_header dma_sw; + }; +} __attribute__((packed)); + +struct torus_tx_ring { + union torus_inj_desc *desc; + struct sk_buff **skbs; + u32 start; + unsigned int tail_idx, pending_idx; + unsigned counter; + phys_addr_t paddr; + spinlock_t lock; +}; + +union torus_source_id { + u32 raw; + atomic_t raw_atomic; + struct { + u32 conn_id : 8; + u32 src_key : 24; + }; +}; + +#define TORUS_SOURCE_ID_NULL (~0ul) /* anything that can't be a legitimate id */ + +union torus_rcv_desc { + u32 raw32[256 / sizeof(u32)]; + u8 raw8[256]; + struct { + union torus_fifo_hw_header fifo; + u32 counter; + union torus_source_id src_id; + u32 data[]; + }; +} __attribute__((packed)); + +struct torus_skb_cb { + union torus_source_id src_id; + u32 received_len; + u32 total_len; +}; + +struct torus_rx_ring { + union torus_rcv_desc *desc; + struct sk_buff_head skb_list; + u32 start; + unsigned int head_idx; + phys_addr_t paddr; + spinlock_t lock; + + /* bookkeeping for packet currently being reconstructed */ + union torus_source_id src_id; + u32 received_len; + u32 total_len; + struct sk_buff *skb; + + /* statistics */ + u32 dropped; + u32 delivered; +}; + +struct bg_torus { + u8 coordinates[3]; + u8 dimension[3]; + union torus_source_id source_id; + + spinlock_t lock; + struct torus_dma *dma; + + struct torus_tx_ring tx[BGP_TORUS_INJ_FIFOS * BGP_TORUS_GROUPS]; + struct torus_rx_ring rx[BGP_TORUS_RCV_FIFOS * BGP_TORUS_GROUPS]; + + /* mapping from counter to tx ring index */ + int inj_counter_to_txidx[BGP_TORUS_COUNTERS * BGP_TORUS_GROUPS]; + + /* counters used */ + unsigned long inj_counter_map[BGP_TORUS_COUNTERS * BGP_TORUS_GROUPS / + sizeof(unsigned long) / 8]; + unsigned long rcv_counter_map[BGP_TORUS_COUNTERS * BGP_TORUS_GROUPS / + sizeof(unsigned long) / 8]; + + /* fifos used */ + unsigned long inj_fifo_map[BGP_TORUS_INJ_FIFOS * BGP_TORUS_GROUPS / + sizeof(unsigned long) / 8 + 1]; + unsigned long rcv_fifo_map[BGP_TORUS_RCV_FIFOS * BGP_TORUS_GROUPS / + sizeof(unsigned long) / 8 + 1]; + + /* dma regions used */ + unsigned long inj_dma_region_map; + unsigned long rcv_dma_region_map; + + unsigned int dcr_base, dcr_size; + struct resource pdma, pfifo0, pfifo1; + int virq[BGP_TORUS_MAX_IRQS]; + + struct of_device *ofdev; + struct ctl_table_header *sysctl_header; +}; + + +extern inline void bgtorus_init_inj_desc(struct bg_torus *torus, + union torus_inj_desc *desc, + int len, u8 x, u8 y, u8 z) +{ + memset(desc, 0, sizeof(*desc)); + + desc->fifo.sk = 1; /* skip checksum */ + desc->fifo.size = 7; /* always full 240 bytes packets */ + desc->fifo.dyn_routing = 1; + desc->fifo.dest_x = x; + desc->fifo.dest_y = y; + desc->fifo.dest_z = z; + + desc->dma_hw.length = len; + + /* atomic { desc->dma_sw.raw32[1] = ++torus->source_id.conn_id; } */ + desc->dma_sw.raw32[1] = + atomic_add_return(1U << 24, &torus->source_id.raw_atomic); +} + +int bgtorus_xmit(struct bg_torus *torus, union torus_inj_desc *desc, + struct sk_buff *skb); + + +#endif /* !__DRIVERS__BLUEGENE__TORUS_H__ */ diff --git a/drivers/net/bgp_torus/bgtornic.c b/drivers/net/bgp_torus/bgtornic.c new file mode 100644 index 00000000000000..da3a9b2e871d79 --- /dev/null +++ b/drivers/net/bgp_torus/bgtornic.c @@ -0,0 +1,597 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Authors: Chris Ward <tjcw@uk.ibm.com> + * Volkmar Uhlig <vuhlig@us.ibm.com> + * + * Description: Blue Gene driver exposing tree and torus as a NIC + * + * + ********************************************************************/ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/etherdevice.h> +#include <linux/tcp.h> +#include <linux/ip.h> + +#include <net/arp.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/bgp_personality.h> +#include <asm/bluegene.h> +#include <linux/KernelFxLog.h> + + +#include "bgtornic.h" + +int col_start_xmit(struct sk_buff *skb, struct net_device *dev); + +/* #define TRUST_TORUS_CRC */ + +#if !defined(CONFIG_BLUEGENE_TCP_WITHOUT_NAPI) +/* Select operation with linux 'dev->poll' */ +#define TORNIC_DEV_POLL +#endif + +/* #define TORNIC_TASKLET_BGNET */ + +/* #define TORNIC_TRANSMIT_TREE_TASKLET */ + +#include "../bgp_network/bgp_net_traceflags.h" + +#define ENABLE_TRACE + +/* #define REQUIRE_TRACE */ + +#if defined(ENABLE_TRACE) +extern int bgp_dma_tcp_tracemask ; +/* extern int bgtorus_debug_tracemask ; */ +#define bgtornic_debug_tracemask bgp_dma_tcp_tracemask +/* static int bgtornic_debug_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */ +#endif + +#if defined(REQUIRE_TRACE) +#define TRACE(x...) KernelFxLog(1,x) +#define TRACE1(x...) KernelFxLog(1,x) +#define TRACE2(x...) KernelFxLog(1,x) +#define TRACEN(i,x...) KernelFxLog(1,x) +#define TRACED(x...) KernelFxLog(1,x) +#define TRACES(x...) KernelFxLog(1,x) +#elif defined(ENABLE_TRACE) +#define TRACE(x...) KernelFxLog(bgtornic_debug_tracemask & k_t_general,x) +#define TRACE1(x...) KernelFxLog(bgtornic_debug_tracemask & k_t_lowvol,x) +#define TRACE2(x...) KernelFxLog(bgtornic_debug_tracemask & k_t_detail,x) +#define TRACEN(i,x...) KernelFxLog(bgtornic_debug_tracemask & (i),x) +#define TRACED(x...) KernelFxLog(1,x) +#define TRACES(x...) KernelFxLog(1,x) +#else +#define TRACE(x...) +#define TRACE1(x...) +#define TRACE2(x...) +#define TRACEN(i,x...) +#define TRACED(x...) +#endif + +/* #define TORNIC_FORCE_BROADCAST 1 */ +/********************************************************************** + * defines + **********************************************************************/ + +static const char version[] = "Bgtornet: Version 1.0, (c) 2008,2010 IBM Corporation, GPL"; + +/********************************************************************** + * Linux module + **********************************************************************/ + +MODULE_DESCRIPTION("BlueGene Torus Ethernet driver"); +MODULE_LICENSE("GPL"); + + +int bgtornic_driverparm = 0 ; +int bgnet_receive_torus(struct sk_buff * skb) ; +void dma_tcp_poll_handler(void) ; +void dma_tcp_rx_enable(void) ; + +/* Diagnostic options */ +enum { + k_inhibit_scattergather = 0 , /* Whether to tell linux we cannot do 'scattergather' DMA TODO: test whether scattergathers actually work, using (e.g.) NFS */ + k_inhibit_gso = 1 /* Whether to tell linux not to try Generic Segmentation Offload ; not useful until I can get s-g working with multiple frags in a skb */ +}; + + +static void dumpmem(const void *address, unsigned int length, const char * label) __attribute__((unused)) ; +static void dumpmem(const void *address, unsigned int length, const char * label) + { + int x ; + TRACEN(k_t_fifocontents|k_t_scattergather,"Memory dump, length=0x%08x: %s",length,label) ; + if( length > 20*32 ) { + length = 20*32 ; + } + for (x=0;x<length;x+=32) + { + int *v = (int *)(address+x) ; + TRACEN(k_t_fifocontents|k_t_scattergather,"%p: %08x %08x %08x %08x %08x %08x %08x %08x", + v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7] + ) ; + } + } + + + +static BGP_Personality_t personality; +static struct net_device *static_dev ; + + +/* int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev) ; */ +int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev +/* ,unsigned int x, unsigned int y, unsigned int z */ + ) ; + +/********************************************************************** + * Linux' packet and skb management + **********************************************************************/ + +static int bgtornet_change_mtu(struct net_device *dev, int new_mtu) +{ +/* struct bgtornet_dev *bgtornet = netdev_priv(dev); */ + if (new_mtu < 60 || new_mtu > BGTORNET_MAX_MTU ) + return -EINVAL; + dev->mtu = new_mtu; +/* bgtree_set_mtu(bgtornet->tree, new_mtu) ; */ + return 0; +} + + +/* Take 2 bytes from every 16 to form a frame verifier */ +static unsigned int asf_frame_verifier(const char * data, unsigned int length) +{ + const unsigned int * data_int = (unsigned int *) data ; + unsigned int result = 0 ; + unsigned int index ; + for(index=0; index<length/sizeof(unsigned int);index += 4) + { + result += data_int[index] ; + } + return result & 0xffff ; +} + +static int bgtornet_receive(struct sk_buff *skb, struct bglink_hdr *lnkhdr, + struct bglink_proto* proto) +{ + struct net_device *dev = (struct net_device*)proto->private; + struct bgtornet_dev *bgtornet = netdev_priv(dev); + + TRACE("bgtornet rcvd pkt: data=%p, len=%d, head=%d, tail=%d, res len=%d", + skb->data, skb->len, lnkhdr->opt_eth.pad_head, + lnkhdr->opt_eth.pad_tail, skb->len - lnkhdr->opt_eth.pad_head - lnkhdr->opt_eth.pad_tail); + + + /* skb_pull and trim check for over/underruns. For 0 size the + * add/subtract is the same as a test */ + __skb_pull(skb, lnkhdr->opt_eth.pad_head); + __skb_trim(skb, skb->len - lnkhdr->opt_eth.pad_tail); + + + +/* dumpmem(skb->data,skb->len,"Frame delivered via torus") ; */ + + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + + TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; + netif_rx(skb); + + + dev->last_rx = jiffies; + bgtornet->stats.rx_packets++; + bgtornet->stats.rx_bytes += skb->len; + + return 0; +} + +void bgtornet_rx_schedule(void) + { + TRACEN(k_t_general,"(>) bgtornet_rx_schedule") ; + { + struct net_device *dev = static_dev; + struct bgtornet_dev *bgtornet = netdev_priv(dev); + TRACEN(k_t_napi,"netif_rx_schedule(dev=%p,napi=%p)",dev,&bgtornet->napi) ; + napi_schedule(&bgtornet->napi) ; + } + TRACEN(k_t_general,"(<) bgtornet_rx_schedule") ; + } + +struct net_device_stats *bgtornet_stats(void) + { + struct net_device *dev = static_dev; + struct bgtornet_dev *bgtornet = netdev_priv(dev); + return &bgtornet->stats ; + } + +static int frame_passes_verification(struct sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + unsigned int eth_proto = eth->h_proto ; + struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ; + unsigned int iphlen = 4*iph->ihl ; + struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) ); + unsigned int ip_proto = iph->protocol ; + if( eth_proto == ETH_P_IP && ip_proto == IPPROTO_TCP ) + { + unsigned int tcphlen = 4*tcph->doff ; + char * payload = ((char *)(tcph)) + (tcphlen) ; + unsigned int payload_len=iph->tot_len-iphlen-tcphlen ; + unsigned int framecheck = asf_frame_verifier(payload,payload_len) ; + unsigned int rcvcheck = tcph->check ; + TRACEN(k_t_general, "framecheck=0x%08x rcvcheck=0x%08x", + framecheck, rcvcheck + ) ; + if( framecheck != rcvcheck) + { + TRACEN(k_t_request,"(!!!) frame verify fails, framecheck=0x%08x rcvcheck=0x%08x payload_len=%d", + framecheck, + rcvcheck, + payload_len) ; + return 0 ; + } + } + return 1 ; +} + +static inline void deliver_frame(struct sk_buff *skb) +{ + struct net_device *dev = static_dev; + struct bgtornet_dev *bgtornet = netdev_priv(dev); + + +/* dumpmem(skb->data,skb->len,"Frame delivered via torus") ; */ + + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); +/* skb->pkt_type = PACKET_HOST ; */ + if( k_trust_torus_crc) skb->ip_summed = CHECKSUM_PARTIAL ; + +#if defined(TORNIC_DEV_POLL) + TRACEN(k_t_napi,"netif_receive_skb(skb=%p)",skb) ; + netif_receive_skb(skb) ; +#else + TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; + netif_rx(skb); +#endif + + dev->last_rx = jiffies; + bgtornet->stats.rx_packets++; + bgtornet->stats.rx_bytes += skb->len; +} + +int bgtornet_receive_torus(struct sk_buff *skb) +{ + + TRACE("bgtornet rcvd pkt: data=%p, len=%d", + skb->data, skb->len); + + if( k_asf_frame_verifier ) + { + if (frame_passes_verification(skb)) + { + deliver_frame(skb) ; + } + else + { + dev_kfree_skb(skb) ; + } + } + else + { + deliver_frame(skb) ; + } + + TRACE("(<)"); + return 0; +} + + +static void inject_verifier(struct sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)skb->data; + unsigned int eth_proto = eth->h_proto ; + struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ; + unsigned int iphlen = 4*iph->ihl ; + struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) ); + unsigned int ip_proto = iph->protocol ; + if( eth_proto == ETH_P_IP && ip_proto == IPPROTO_TCP ) + { + unsigned int tcphlen = 4*tcph->doff ; + char * payload = ((char *)(tcph)) + (tcphlen) ; + unsigned int payload_len=iph->tot_len-iphlen-tcphlen ; + unsigned int framecheck = asf_frame_verifier(payload,payload_len) ; + tcph->check = framecheck ; + TRACEN(k_t_general,"framecheck set to 0x%08x",framecheck) ; + } + +} + +static int bgtornet_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ +#if defined(CONFIG_BLUEGENE_TCP) + struct ethhdr *eth = (struct ethhdr *)skb->data; + struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ; + struct bgtornet_dev *bgtornet = netdev_priv(dev); + unsigned int h_proto = eth->h_proto ; + unsigned int daddr = iph->daddr ; + TRACEN(k_t_general,"(>) skb=%p skb->sk=%p h_dest[%02x:%02x:%02x:%02x:%02x:%02x] daddr=0x%08x", skb, skb->sk, + eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5], daddr) ; + if( eth->h_dest[0] == 0x00 && eth->h_dest[1] == 0x80 && eth->h_dest[2] == 0x47) + { + + if( h_proto == ETH_P_IP && (daddr >> 24) == 12) + { + eth->h_dest[3]=(daddr >> 16) & 0xff ; + eth->h_dest[4]=(daddr >> 8) & 0xff ; + eth->h_dest[5]=(daddr& 0xff) - 1 ; + } + + if( eth->h_dest[3] == personality.Network_Config.Xcoord + && eth->h_dest[4] == personality.Network_Config.Ycoord + && eth->h_dest[5] == personality.Network_Config.Zcoord + ) + { + netif_rx(skb) ; /* Try to feed the skb to the local networking layer */ + } + else + { + if( k_asf_frame_verifier ) inject_verifier(skb) ; + bgtorus_start_xmit(skb, dev +/* , eth->h_dest[3],eth->h_dest[4],eth->h_dest[5] */ + ) ; + } + bgtornet->stats.tx_packets += 1 ; + bgtornet->stats.tx_bytes += skb->len ; + } + else + { + /* Request to send a frame over the torus, but not to a torus MAC address. Trace and discard. */ + TRACEN(k_t_protocol,"skb=%p skb->sk=%p h_dest[%02x:%02x:%02x:%02x:%02x:%02x] not torus-mac", skb, skb->sk, + eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]) ; +/* bgtornet->stats.tx_errors += 1; */ +/* bgtornet->stats.tx_aborted_errors += 1; */ + dev_kfree_skb(skb) ; + + } + TRACEN(k_t_general,"(<)") ; +#else + col_start_xmit(skb, dev) ; +#endif + return 0 ; +} + +static int bgtornet_poll(struct napi_struct * napi, int budget) + { + struct net_device *dev = napi->dev ; + struct bgtornet_dev *bgtornet = netdev_priv(dev); + TRACEN(k_t_general,"(>) bgtornet_poll napi=%p dev=%p budget=%d", napi, dev, budget) ; + TRACEN(k_t_napi,"napi polling starts") ; + dma_tcp_poll_handler() ; + TRACEN(k_t_napi,"netif_rx_complete(dev=%p,napi=%p)",dev,&bgtornet->napi) ; + napi_complete(&bgtornet->napi); + dma_tcp_rx_enable() ; + TRACEN(k_t_general,"(<) bgtornet_poll dev=%p", dev) ; + return 0 ; + } + +static void bgtornet_uninit(struct net_device *dev) +{ + struct bgtornet_dev *bgtornet = netdev_priv(dev); + BUG_ON(bgtornet->lnk.private != dev); + +} + +static struct net_device_stats *bgtornet_get_stats(struct net_device *dev) +{ + struct bgtornet_dev *bgtornet = netdev_priv(dev); + return &bgtornet->stats; +} + + +static int bgtornet_init (struct net_device *dev) +{ + struct bgtornet_dev *bgtornet = netdev_priv(dev); + + bgtornet = netdev_priv(dev); + + + + /* register with tree */ + bgtornet->lnk.lnk_proto = bgtornet->tor_protocol; + bgtornet->lnk.rcv = bgtornet_receive; + bgtornet->lnk.private = dev; + + + + return 0; +} + +void bgtornet_set_arp_table_entry(unsigned int x, unsigned int y, unsigned int z, unsigned int ip_address) + { + struct net_device *dev = static_dev ; + __be32 ip = ip_address ; + struct neighbour * neigh = neigh_create(&arp_tbl, &ip, dev); + if (neigh) { + u8 lladdr[6] ; + lladdr[0] = 0x00 ; + lladdr[1] = 0x80 ; + lladdr[2] = 0x47 ; + lladdr[3] = x ; + lladdr[4] = y ; + lladdr[5] = z ; + neigh_update(neigh, lladdr, NUD_PERMANENT, NEIGH_UPDATE_F_OVERRIDE); + neigh_release(neigh); + } + } + +#if defined(HAVE_NET_DEVICE_OPS) +static const struct net_device_ops netdev_ops = { + .ndo_change_mtu = bgtornet_change_mtu , + .ndo_get_stats = bgtornet_get_stats , + .ndo_start_xmit = bgtornet_start_xmit , + .ndo_init = bgtornet_init , + .ndo_uninit = bgtornet_uninit , +}; +#endif + +static unsigned int dummy_features ; + +static struct ctl_table bgp_tornic_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "features", + .data = &dummy_features, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { 0 }, +} ; +static struct ctl_path tornic_ctl_path[] = { + { .procname = "bgp", .ctl_name = 0, }, + { .procname = "torusdev", .ctl_name = 0, }, + { }, +}; + + +int __init +bgtornet_module_init (void) +{ + + struct bgtornet_dev *bgtornet; + struct net_device *dev; + printk (KERN_INFO "%s\n", version); + + bluegene_getPersonality( &personality, sizeof(personality) ); + + dev = alloc_etherdev(sizeof(struct bgtornet_dev)); + if (!dev) + return -ENOMEM; + + static_dev = dev ; + + + bgtornet = netdev_priv(dev); + memset(bgtornet, 0, sizeof(*bgtornet)); + /* The following probably need to be configurable */ + + bgtornet->phandle_torus = 0; + bgtornet->eth_mask = 0; + dev->dev_addr[0] = 0x00; + dev->dev_addr[1] = 0x80; + dev->dev_addr[2] = 0x47; + dev->dev_addr[3] = personality.Network_Config.Xcoord ; + dev->dev_addr[4] = personality.Network_Config.Ycoord ; + dev->dev_addr[5] = personality.Network_Config.Zcoord ; + + bgtornet->eth_local = bgtornet->eth_mask & *(unsigned int *)&dev->dev_addr[0]; + +#if defined(HAVE_NET_DEVICE_OPS) + dev->netdev_ops = &netdev_ops ; +#else + dev->init = bgtornet_init; + dev->uninit = bgtornet_uninit; + dev->get_stats = bgtornet_get_stats; + dev->hard_start_xmit = bgtornet_start_xmit; + dev->change_mtu = bgtornet_change_mtu; +#endif + dev->mtu = BGTORNET_DEFAULT_MTU; + + + TRACEN(k_t_napi,"netif_napi_add(dev=%p,napi=%p,poll=bgtornet_poll,weight=16)",dev,&bgtornet->napi) ; + netif_napi_add(dev,&bgtornet->napi,bgtornet_poll,16) ; + TRACEN(k_t_napi,"napi poll_list=(%p,%p) state=%lu weight=%d poll=%p dev=%p dev_list=(%p,%p)", + bgtornet->napi.poll_list.next,bgtornet->napi.poll_list.prev, + bgtornet->napi.state,bgtornet->napi.weight,bgtornet->napi.poll, + bgtornet->napi.dev, + bgtornet->napi.dev_list.next,bgtornet->napi.dev_list.prev ) ; + TRACEN(k_t_napi,"napi_enable(napi=%p)",&bgtornet->napi) ; + napi_enable(&bgtornet->napi) ; + TRACEN(k_t_napi,"napi poll_list=(%p,%p) state=%lu weight=%d poll=%p dev=%p dev_list=(%p,%p)", + bgtornet->napi.poll_list.next,bgtornet->napi.poll_list.prev, + bgtornet->napi.state,bgtornet->napi.weight,bgtornet->napi.poll, + bgtornet->napi.dev, + bgtornet->napi.dev_list.next,bgtornet->napi.dev_list.prev ) ; + + +/* If we're trusting the torus hardware, there is no point forming an IP checksum on the send side */ + dev->features = NETIF_F_HIGHDMA + | (k_trust_torus_crc ? (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : 0 ) + | (k_inhibit_scattergather ? 0 : NETIF_F_SG) ; + + skb_queue_head_init(&(bgtornet->xmit_list)) ; + + + if (register_netdev(dev) != 0) + goto err; + if( k_inhibit_gso ) + { + dev->features &= ~(NETIF_F_GSO) ; // scatter-gather sometimes does not get it right. Might be a problem with GSO or might be broken anyway + /* TODO: Isolate whether GSO is broken or whether the torus driver is broken */ + } + + bgp_tornic_table[0].data = &(dev->features) ; + + register_sysctl_paths(tornic_ctl_path,bgp_tornic_table) ; + + printk(KERN_INFO + "%s: BGNET %s, MAC %02x:%02x:%02x:%02x:%02x:%02x\n" "BGTORNET mask 0x%08x local 0x%08x\n", + dev->name, "np->full_name", + dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2], + dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5], + bgtornet->eth_mask, bgtornet->eth_local + ); + + return 0; + + err: + free_netdev(dev); + return -1; + + + return 0; +} + +void __exit bgtornet_module_exit (void) +{ +} + +/* module_init(bgtornet_module_init); */ +/* module_exit(bgtornet_module_exit); */ diff --git a/drivers/net/bgp_torus/bgtornic.h b/drivers/net/bgp_torus/bgtornic.h new file mode 100644 index 00000000000000..2139081efd85cd --- /dev/null +++ b/drivers/net/bgp_torus/bgtornic.h @@ -0,0 +1,126 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Authors: Volkmar uhlig + * Chris Ward <tjcw@uk.ibm.com> + * + * Description: definitions for BG networks + * + * + ********************************************************************/ + +#ifndef __DRIVERS__NET__BLUEGENE__BGNIC_H__ +#define __DRIVERS__NET__BLUEGENE__BGNIC_H__ + +/* #define BG_IRQ(group, irq) ((group) << 5 | (irq)) */ + + +/********************************************************************** + * link layer + **********************************************************************/ + +/* #define BGNET_P_ETH0 1 */ +/* #define BGNET_P_ETH1 2 */ +/* #define BGNET_P_ETH2 3 */ +/* #define BGNET_P_ETH3 4 */ +/* #define BGNET_P_ETH4 5 */ +/* #define BGNET_P_ETH5 6 */ +/* #define BGNET_P_ETH6 7 */ +/* #define BGNET_P_ETH7 8 */ +/* #define BGNET_P_ETH8 9 */ +/* #define BGNET_P_LAST_ETH BGNET_P_ETH8 */ +/* */ +/* #define BGNET_P_CONSOLE 20 */ + +/* #define BGNET_FRAG_MTU 240 */ +/* When running 'dma_tcp_frames', we can have an MTU as large as we like. IP limits to 64k, though. */ +enum { + BGTORNET_DEFAULT_MTU = ETH_DATA_LEN , + BGTORNET_MAX_MTU = 65536 +}; +#define BGNET_MAX_MTU 65536 +/* #define BGNET_MAX_MTU (BGNET_FRAG_MTU * 128) */ +/* #define BGNET_DEFAULT_MTU (BGNET_FRAG_MTU * 30 - 12) */ +/* #define BGNET_DEFAULT_MTU ETH_DATA_LEN */ + +enum { + k_trust_torus_crc = +#if defined(BGP_TORUS_IP_CHECKSUM) + 0 +#else + 1 +#endif + , +/* #if defined(CONFIG_BGP_TORUS_ADAPTIVE_ROUTING) */ +// k_trust_torus_crc = 1 , /* Whether the IP layer should trust the BGP hardware CRC on the torus network */ +/* #else */ +// k_trust_torus_crc = 1 , /* Whether the IP layer should trust the BGP hardware CRC on the torus network */ +/* #endif */ + k_asf_frame_verifier = 0 /* Whether to try a frame verifier in the bgtornic layer */ +}; + + +struct bglink_hdr +{ + unsigned int dst_key; + unsigned int src_key; + unsigned short conn_id; + unsigned char this_pkt; + unsigned char total_pkt; + unsigned short lnk_proto; /* 1 eth, 2 con, 3... */ + union { + unsigned short optional; /* for encapsulated protocol use */ + struct { + u8 pad_head; + u8 pad_tail; + } opt_eth; + }; +} __attribute__((packed)); + + +struct bglink_proto +{ + unsigned short lnk_proto; + int (*rcv)(struct sk_buff*, struct bglink_hdr*, struct bglink_proto*); + void *private; + struct list_head list; +}; + +struct bgtornet_dev +{ + unsigned short tor_protocol; + unsigned int eth_mask; + unsigned int eth_local; + struct bglink_proto lnk; + struct net_device_stats stats; + u32 phandle_torus; + struct napi_struct napi ; /* 2.6.27-ism for NAPI poll */ + struct sk_buff_head xmit_list ; /* List of skb's to be sent */ +}; + +extern inline unsigned int eth_to_key(char *addr) +{ + unsigned int key; + if (is_broadcast_ether_addr(addr)) + key = ~0U; + else + key = (addr[3] << 16) | (addr[4] << 8) | (addr[5] << 0); + return key; +} + + +#endif /* !__DRIVERS__NET__BLUEGENE__BGNIC_H__ */ diff --git a/drivers/net/bgp_torus/torus.c b/drivers/net/bgp_torus/torus.c new file mode 100644 index 00000000000000..884606fa7dd026 --- /dev/null +++ b/drivers/net/bgp_torus/torus.c @@ -0,0 +1,548 @@ +/********************************************************************* + * + * (C) Copyright IBM Corp. 2007,2010 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses>. + * + * Authors: Chris Ward <tjcw@uk.ibm.com> + * Volkmar Uhlig <vuhlig@us.ibm.com> + * + * Description: Blue Gene low-level driver for tree + * + ********************************************************************/ + +#include <linux/kernel.h> +#include <linux/module.h> + +#include <linux/fs.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/fcntl.h> +#include <linux/init.h> +#include <linux/poll.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/mm.h> +#include <linux/cdev.h> +#include <linux/proc_fs.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/etherdevice.h> +#include <linux/tcp.h> +#include <linux/KernelFxLog.h> + +#include <net/arp.h> + +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/io.h> +#include <asm/page.h> +#include <linux/irq.h> +#ifdef CONFIG_PPC_MERGE +#include <asm/prom.h> +#include <asm/of_platform.h> +#endif + +#include <asm/bgp_personality.h> +#include <asm/bluegene.h> + + +/* #include "bgnic.h" */ +/* #include "bgcol.h" */ + +#define TORUS_DEV_NAME "bgtorus" +#include "../bgp_network/bgp_net_traceflags.h" + +int __init +bgtornet_module_init(void) ; +int __init +bgtornet_module_exit(void) ; +int __exit +dma_tcp_module_init(void) ; +int __exit +dma_tcp_module_cleanup(void) ; + +typedef struct { + struct sk_buff_head skb_list_xmit ; /* List of skb's being passed to the tasklet for sending */ +} bg_tcptorus ; + +static bg_tcptorus static_torus ; + +typedef struct { + unsigned char x ; + unsigned char y ; + unsigned char z ; +} torusTarget_t ; + +/* #define CONFIG_BLUEGENE_TORUS_TRACE */ + +#if defined(CONFIG_BLUEGENE_TORUS_TRACE) +/* int bgtorus_debug_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */ +/* int bgtorus_debug_tracemask=k_t_protocol; */ +int bgtorus_debug_tracemask=k_t_init ; +#endif + +#if defined(REQUIRE_TRACE) +#define TRACE(x...) KernelFxLog(1,x) +#define TRACE1(x...) KernelFxLog(1,x) +#define TRACE2(x...) KernelFxLog(1,x) +#define TRACEN(i,x...) KernelFxLog(1,x) +#define TRACED(x...) KernelFxLog(1,x) +#define TRACES(x...) KernelFxLog(1,x) +#elif defined(CONFIG_BLUEGENE_TORUS_TRACE) +#define TRACE(x...) KernelFxLog(bgtorus_debug_tracemask & k_t_general,x) +#define TRACE1(x...) KernelFxLog(bgtorus_debug_tracemask & k_t_lowvol,x) +#define TRACE2(x...) KernelFxLog(bgtorus_debug_tracemask & k_t_detail,x) +#define TRACEN(i,x...) KernelFxLog(bgtorus_debug_tracemask & (i),x) +#define TRACED(x...) KernelFxLog(1,x) +#define TRACES(x...) KernelFxLog(1,x) +#else +#define TRACE(x...) +#define TRACE1(x...) +#define TRACE2(x...) +#define TRACEN(i,x...) +#define TRACED(x...) +#define TRACES(x...) +#endif + +/* #define HAS_HOSTS */ +/* #define HAS_NICPARM */ +/* #define HAS_DRIVERPARM */ +#define HAS_TORUSDIAG + +/* If you need settable parameters for the tree or the NIC (for debugging), enable them here */ +#if defined(HAS_DRIVERPARM) +static int bgtorus_driverparm ; +#endif + +#if defined(HAS_NICPARM) +extern int bgnic_driverparm ; +#endif + +/* void torus_learn_host(const char *cp) ; */ + +int bgp_dma_ethem ; /* Set externally if we want to try 'eth-em' on torus */ + +/* #define SENDS_WITH_TASKLET */ + +#define BGP_COL_MAJOR_NUM 120 +#define BGP_TORUS_MAJOR_NUM 121 +#define BGP_GI_MAJOR_NUM 122 +#define BGP_COL_MINOR_NUMS 2 +#define BGP_TORUS_MINOR_NUMS 2 +#define BGP_GI_MINOR_NUMS 4 +#define _BGP_UA_COL0 (0x6) +#define _BGP_PA_COL0 (0x10000000) +#define _BGP_UA_COL1 (0x6) +#define _BGP_PA_COL1 (0x11000000) +#define _BGP_UA_TORUS0 (0x6) +#define _BGP_PA_TORUS0 (0x01140000) +#define _BGP_UA_TORUS1 (0x6) +#define _BGP_PA_TORUS1 (0x01150000) + +/* + * device management + */ +struct bgpnet_dev +{ + int major,minor; /* device major, minor */ + unsigned long long physaddr; /* physical address */ + struct task_struct* current; /* process holding device */ + int signum; /* signal to send holding process */ + wait_queue_head_t read_wq; + int read_complete; + void *regs; /* mapped regs (only used with col) */ + struct semaphore sem; /* interruptible semaphore */ + struct cdev cdev; /* container device? */ +}; + + +#define BGP_MAX_DEVICES 8 +static struct bgpnet_dev bgpnet_devices[BGP_MAX_DEVICES]; +static unsigned int bgpnet_num_devices = 0; + + +static int bgtorus_mappable_module_init(void) ; + +static int bgpnet_add_device(int major, int minor, const char* name, + unsigned long long base, int irq, + irqreturn_t (*irq_handler)(int, void*)); +static int bgpnet_device_open(struct inode *inode, struct file *filp); +static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *); +static int bgpnet_device_release(struct inode *inode, struct file * filp); +static int bgpnet_device_ioctl(struct inode *inode, struct file * filp, + unsigned int cmd, unsigned long arg); + + +static struct file_operations bgpnet_device_fops = +{ + .owner= THIS_MODULE, + .open= bgpnet_device_open, + .read= NULL, + .write= NULL, + .poll= NULL, + .ioctl= bgpnet_device_ioctl, + .release= bgpnet_device_release, + .mmap= bgpnet_device_mmap, +}; + + + +#if defined(HAS_TORUSDIAG) +void torus_diag(int param) ; /* So we can drive a function in the torus layer to poke at things */ +#endif + +void bgp_dma_tcp_send_and_free( struct sk_buff *skb ) ; + +void bgp_dma_tcp_poll(void) ; + + +int col_start_xmit(struct sk_buff *skb, struct net_device *dev) ; +/* We have a frame which should be routable via the torus. */ +/* For code path checkout, try it via the tree ... */ +int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev +/* , unsigned int x, unsigned int y, unsigned int z */ + ) +{ +/* int ethem = bgp_dma_ethem ; */ +/* TRACEN(k_t_general,"(>) %s:%d", __func__, __LINE__) ; */ +/* if( 0 == ethem ) */ +/* { */ +/* col_start_xmit(skb, dev) ; */ +/* } */ +/* else */ +/* { */ +/* struct inet_connection_sock *icskp = inet_csk(skb->sk) ; */ +/* if( ethem & 4) */ +/* { */ +/* // Feature for duplicating the frame over the tree, so we can take the torus 'through the motions' */ +/* // as we bring up various drivers */ +/* struct sk_buff *cloneskb = skb_clone(skb, GFP_ATOMIC) ; */ +/* if( cloneskb) */ +/* { */ +/* col_start_xmit(cloneskb, dev) ; */ +/* } */ +/* } */ +/* #if defined(CONFIG_BLUEGENE_TCP) */ +/* if( 1 ) */ +/* { */ + bgp_dma_tcp_send_and_free(skb +/* ,x,y,z */ + ) ; +/* */ +/* } */ +/* else */ +/* { */ +/* col_start_xmit(skb, dev) ; */ +/* } */ +/* #else */ +/* col_start_xmit(skb, dev) ; */ +/* #endif */ +/* } */ + TRACEN(k_t_general,"(<) %s:%d", __func__, __LINE__) ; + return 0 ; +} + +static int bgtorus_proc_read (char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int remaining = count; + *eof = 1; + + return count-remaining ; +} + +#if defined(CONFIG_BLUEGENE_TORUS_TRACE) || defined(HAS_DRIVERPARM) || defined(HAS_NICPARM) || defined(HAS_TORUSDIAG) +static unsigned char xtable[256] = + { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; + +static int bgtorus_atoix(const unsigned char *cp) + { + int result = 0 ; + unsigned char ecp = xtable[*cp] ; + while (ecp < 0x10) + { + result = (result << 4 ) | ecp ; + cp += 1 ; + ecp = xtable[*cp] ; + } + return result ; + } +#endif + +static int bgtorus_proc_write(struct file *filp, const char __user *buff, unsigned long len, void *data) + { + char proc_write_buffer[256] ; + unsigned long actual_len=(len<255) ? len : 255 ; + int rc = copy_from_user( proc_write_buffer, buff, actual_len ) ; + if( rc != 0 ) return -EFAULT ; + proc_write_buffer[actual_len] = 0 ; +#if defined(HAS_DRIVERPARM) + if( 0 == strncmp(proc_write_buffer,"driverparm=",11)) + { + bgtorus_driverparm=bgtorus_atoix(proc_write_buffer+11) ; + } +#endif +#if defined(HAS_NICPARM) + if( 0 == strncmp(proc_write_buffer,"nicparm=",8)) + { + bgnic_driverparm=bgtorus_atoix(proc_write_buffer+8) ; + } +#endif +#if defined(CONFIG_BLUEGENE_TORUS_TRACE) + if ( 0 == strncmp(proc_write_buffer,"tracemask=",10) ) + { + bgtorus_debug_tracemask = bgtorus_atoix(proc_write_buffer+10) ; + } +#endif +#if defined(HAS_TORUSDIAG) + if ( 0 == strncmp(proc_write_buffer,"torusdiag=",10) ) + { + int diag_opcode = bgtorus_atoix(proc_write_buffer+10) ; + torus_diag(diag_opcode) ; + } +#endif + + return actual_len ; + } + +#if defined(TCP_TORUS_AVAILABLE) +extern BGP_Personality_t tcp_bgp_personality; +#endif + + +static int __init +torus_module_init (void) +{ + struct proc_dir_entry *ent; + TRACEN(k_t_init,"torus_module_init") ; + /* ----------------------------------------------------- */ + /* create /proc entry */ + /* ----------------------------------------------------- */ + printk(KERN_INFO "%s:%d create proc ent \n", __func__, __LINE__); + ent = create_proc_entry("driver/" TORUS_DEV_NAME, S_IRUGO, NULL); + if (ent) + { + ent->nlink = 1; + ent->read_proc = (void *)bgtorus_proc_read; + ent->write_proc = (void *)bgtorus_proc_write; + } +#if defined(TCP_TORUS_AVAILABLE) + bluegene_getPersonality( &tcp_bgp_personality, sizeof(tcp_bgp_personality) ); + printk(KERN_NOTICE "Network_Config.Rank=%08x Network_Config.IOnodeRank=%08x\n", + tcp_bgp_personality.Network_Config.Rank, + tcp_bgp_personality.Network_Config.IOnodeRank + ) ; +#endif + skb_queue_head_init(&static_torus.skb_list_xmit) ; + /* Bring up the memory-mappable version */ + bgtorus_mappable_module_init() ; + /* NIC and IP driver initialisation */ + bgtornet_module_init() ; + dma_tcp_module_init() ; + return 0 ; +} + +static void __exit +torus_module_exit (void) +{ + TRACEN(k_t_init,"torus_module_exit") ; + bgtornet_module_exit() ; +/* dma_tcp_module_cleanup() ; */ +} +/* Code grabbed from Rch's driver so that we can map the torus for user-space access */ + + +static int bgpnet_add_device(int major, + int minor, + const char* devname, + unsigned long long physaddr, + int irq, + irqreturn_t (*irq_handler)(int, void *)) +{ + int ret; + dev_t devno; + struct bgpnet_dev* dev = &bgpnet_devices[bgpnet_num_devices]; + TRACEN(k_t_init,"bgpnet_add_device devname=%s",devname) ; + /* initilize struct */ + init_MUTEX (&dev->sem); + dev->major = major; + dev->minor = minor; + dev->physaddr = physaddr; + init_waitqueue_head(&dev->read_wq); + dev->read_complete = 0; + if (physaddr) { + dev->regs = ioremap(physaddr, 4096); + } + devno=MKDEV(major,minor); + + /* register i.e., /proc/devices */ + ret=register_chrdev_region(devno,1,(char *)devname); + + if (ret) + { + printk (KERN_WARNING "bgpnet: couldn't register device (%d,%d) register_chrdev_region err=%d\n", + major,minor,ret); + return ret; + } + + /* add cdev */ + cdev_init(&dev->cdev,&bgpnet_device_fops); + dev->cdev.owner=THIS_MODULE; + dev->cdev.ops=&bgpnet_device_fops; + ret=cdev_add(&dev->cdev,devno,1); + if (ret) + { + printk(KERN_WARNING "bgpnet: couldn't register device (%d,%d) cdev_add err=%d\n", + major,minor,ret); + return ret; + } + + /* signul to pass to owning process, should be altered using ioctl */ + dev->signum=-1; + + bgpnet_num_devices++; + + return 0; +} + + +static int bgpnet_device_open (struct inode *inode, struct file *filp) +{ + struct bgpnet_dev *dev=container_of(inode->i_cdev,struct bgpnet_dev,cdev); + + if(down_interruptible(&dev->sem)) return -ERESTARTSYS; + up(&dev->sem); + + dev->current=current; + filp->private_data = (void*) dev; + + TRACE("bgpnet: device (%d,%d) opened by process \"%s\" pid %i", + MAJOR(inode->i_rdev), MINOR(inode->i_rdev), current->comm, current->pid); + + return 0; +} + + + + +static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *vma) +{ + unsigned long vsize = vma->vm_end - vma->vm_start; + struct bgpnet_dev * device = (struct bgpnet_dev *)filp->private_data; + int ret = -1; + + /* ------------------------------------------------------- */ + /* set up page protection. */ + /* ------------------------------------------------------- */ + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO; + vma->vm_flags |= VM_RESERVED; + + /* ------------------------------------------------------- */ + /* do the mapping */ + /* ------------------------------------------------------- */ + + if (device->physaddr != 0) + ret = remap_pfn_range(vma, + vma->vm_start, + device->physaddr >> PAGE_SHIFT, + vsize, + vma->vm_page_prot); + + if (ret) { + printk (KERN_WARNING "bgpnet: mapping of device (%d,%d) failed\n", + device->major, device->minor); + } else { + TRACE("bgpnet: mapped (%d,%d) to vm=%lx", + device->major, device->minor, vma->vm_start); + } + return ret? -EAGAIN :0; +} + +/* ************************************************************************* */ +/* BG/P network: release device */ +/* ************************************************************************* */ + +static int bgpnet_device_release (struct inode *inode, struct file * filp) +{ + struct bgpnet_dev *dev=(struct bgpnet_dev *)filp->private_data; + + /*Ensure exclusive access*/ + if(down_interruptible(&dev->sem)) return -ERESTARTSYS; + + dev->current = NULL; + up(&dev->sem); + + TRACE("bgpnet: device (%d,%d) successfully released", + MAJOR(inode->i_rdev), MINOR(inode->i_rdev)); + return 0; +} + + +static int bgpnet_device_ioctl (struct inode *inode, + struct file * filp, + unsigned int cmd, + unsigned long arg) +{ + return 0; +} + +static int bgtorus_mappable_module_init(void) +{ +/* unsigned long long tr0, tr1; */ + unsigned long long ts0, ts1; + + TRACEN(k_t_init,"bgtorus_mappable_module_init") ; + +/* tr0=((unsigned long long)_BGP_UA_COL0<<32) + _BGP_PA_COL0; */ +/* tr1=((unsigned long long)_BGP_UA_COL1<<32) + _BGP_PA_COL1; */ + ts0=((unsigned long long)_BGP_UA_TORUS0<<32) + _BGP_PA_TORUS0; + ts1=((unsigned long long)_BGP_UA_TORUS1<<32) + _BGP_PA_TORUS1; + + bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 0, "bgptorus_g0", ts0, -1, NULL); + bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 1, "bgptorus_g1", ts1, -1, NULL); + + mb(); + + return 0; + +} + + +/* module_init(bgtorus_mappable_module_init); */ + +module_init(torus_module_init); +module_exit(torus_module_exit); diff --git a/fs/Kconfig b/fs/Kconfig index 93945dd0b1aed9..47927a4421a7a7 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -170,7 +170,7 @@ config TMPFS_POSIX_ACL config HUGETLBFS bool "HugeTLB file system support" - depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \ + depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BLUEGENE || \ (S390 && 64BIT) || BROKEN help hugetlbfs is a filesystem backing for HugeTLB pages, based on diff --git a/include/linux/KernelFxLog.h b/include/linux/KernelFxLog.h new file mode 100644 index 00000000000000..3766013d9adc27 --- /dev/null +++ b/include/linux/KernelFxLog.h @@ -0,0 +1,35 @@ +#ifndef __KernelFxLogger_h__ +#define __KernelFxLogger_h__ + +static const char * FindShortPathName(const char *PN, unsigned int length) __attribute__ ((unused)) ; +static const char * FindShortPathName(const char *PN, unsigned int length) + { + int slashcount = 0; + int i; + for( i = length-1; i >= 0 ; i-- ) + { + if( PN[i] == '/' ) + { + slashcount++; + if( slashcount == 3 ) + break; + } + } + return PN+i ; + } + + +#define KernelFxLog(dbgcat, fmt, args...) \ + do { \ + if(dbgcat) \ + { \ + static const char filename[] = __FILE__ ; \ + printk(KERN_INFO " %5d %1X ..%20s %4d %30s() " fmt "\n", \ + current->pid, \ + current_thread_info()->cpu, \ + FindShortPathName(filename,sizeof(filename)), __LINE__, __FUNCTION__, ## args); \ + } \ + } while (0) + + +#endif diff --git a/include/linux/alignment_histograms.h b/include/linux/alignment_histograms.h new file mode 100644 index 00000000000000..484d1d62fd5e30 --- /dev/null +++ b/include/linux/alignment_histograms.h @@ -0,0 +1,38 @@ +#ifndef _LINUX_ALIGNMENT_HISTOGRAM_H +#define _LINUX_ALIGNMENT_HISTOGRAM_H + +#include <linux/autoconf.h> + +#if defined(CONFIG_DEBUG_ALIGNMENT_HISTOGRAM) + +enum { + k_histogram_size=16 +}; +struct alignment_histogram { + int src_alignment_histogram_crc[k_histogram_size] ; + int dst_alignment_histogram_crc[k_histogram_size] ; + int rel_alignment_histogram_crc[k_histogram_size] ; + int src_alignment_histogram_copy[k_histogram_size] ; + int dst_alignment_histogram_copy[k_histogram_size] ; + int rel_alignment_histogram_copy[k_histogram_size] ; + int tagged[k_histogram_size] ; + long long int qcopybytes ; + long long int copybytes ; + long long int copybytesshort ; + long long int copybytesmisalign ; + long long int copybytesbroke ; + long long int crcbytes ; + long long int csumpartialbytes ; + int min_size_of_interest ; +}; +extern struct alignment_histogram al_histogram ; + +#define INC_AL_HISTOGRAM(Name,Address,Size) \ + { if((Size) >= al_histogram.min_size_of_interest) { al_histogram.Name[(Address)&(k_histogram_size-1)] += 1 ; } } +#define AL_HISTOGRAM(Name,Index) (al_histogram.Name[(Index)&(k_histogram_size-1)]) +#else +#define INC_AL_HISTOGRAM(Name,Address,Size) +#define AL_HISTOGRAM(Name,Index) 0 +#endif + +#endif diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 9f315382610b57..a72bcaeefa77e0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -646,7 +646,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); * for_each_cpu_and - iterate over every cpu in both masks * @cpu: the (optionally unsigned) integer iterator * @mask: the first cpumask pointer - * @and: the second cpumask pointer + * @andmask: the second cpumask pointer * * This saves a temporary CPU mask in many places. It is equivalent to: * struct cpumask tmp; @@ -656,9 +656,9 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); * * After the loop, cpu is >= nr_cpu_ids. */ -#define for_each_cpu_and(cpu, mask, and) \ +#define for_each_cpu_and(cpu, mask, andmask) \ for ((cpu) = -1; \ - (cpu) = cpumask_next_and((cpu), (mask), (and)), \ + (cpu) = cpumask_next_and((cpu), (mask), (andmask)), \ (cpu) < nr_cpu_ids;) #endif /* SMP */ diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h index e576b848ce10f8..eeb3fb447c01d7 100644 --- a/include/linux/kmalloc_sizes.h +++ b/include/linux/kmalloc_sizes.h @@ -19,27 +19,34 @@ CACHE(32768) CACHE(65536) CACHE(131072) -#if KMALLOC_MAX_SIZE >= 262144 +#if (NR_CPUS > 512) || (MAX_NUMNODES > 256) || !defined(CONFIG_MMU) CACHE(262144) #endif -#if KMALLOC_MAX_SIZE >= 524288 +#ifdef CONFIG_BGP +/* Intended for 'large' allocations of DMA buffers at boot time, because I cannot get bootmem_alloc to work */ + CACHE(262144) CACHE(524288) -#endif -#if KMALLOC_MAX_SIZE >= 1048576 CACHE(1048576) -#endif -#if KMALLOC_MAX_SIZE >= 2097152 CACHE(2097152) -#endif -#if KMALLOC_MAX_SIZE >= 4194304 CACHE(4194304) -#endif -#if KMALLOC_MAX_SIZE >= 8388608 +#if defined(CONFIG_HUGE_KMALLOC) CACHE(8388608) -#endif -#if KMALLOC_MAX_SIZE >= 16777216 CACHE(16777216) -#endif -#if KMALLOC_MAX_SIZE >= 33554432 CACHE(33554432) + CACHE(67108864) + CACHE(134217728) + CACHE(268435456) + CACHE(536870912) +#endif #endif +#ifndef CONFIG_MMU + CACHE(524288) + CACHE(1048576) +#ifdef CONFIG_LARGE_ALLOCS + CACHE(2097152) + CACHE(4194304) + CACHE(8388608) + CACHE(16777216) + CACHE(33554432) +#endif /* CONFIG_LARGE_ALLOCS */ +#endif /* CONFIG_MMU */ diff --git a/include/linux/resource.h b/include/linux/resource.h index 40fc7e62608220..8e9e437a369f83 100644 --- a/include/linux/resource.h +++ b/include/linux/resource.h @@ -62,7 +62,11 @@ struct rlimit { * GPG2 wants 64kB of mlocked memory, to make sure pass phrases * and other sensitive information are never written to disk. */ +#if defined(CONFIG_INFINIBAND) +#define MLOCK_LIMIT (10*1024*PAGE_SIZE) +#else #define MLOCK_LIMIT ((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024) +#endif /* * Due to binary compatibility, the actual resource numbers diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9dcf956ad18ab2..3dac14d62a96d2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -124,8 +124,13 @@ struct sk_buff_head { struct sk_buff; +#if defined(CONFIG_BGP) +/* Set 'high' to give scope for ZRL 'soft Iwarp' over the BlueGene torus */ +#define MAX_SKB_FRAGS 18 +#else /* To allow 64K frame to be packed as single skb without frag_list */ #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) +#endif typedef struct skb_frag_struct skb_frag_t; diff --git a/include/linux/stddef.h b/include/linux/stddef.h index 6a40c76bdcf1a7..18b8dcf8935dac 100644 --- a/include/linux/stddef.h +++ b/include/linux/stddef.h @@ -12,10 +12,13 @@ #ifdef __KERNEL__ +/* C++ preprocessor has 'false' and 'true' as keywords, so the enum doesn't work */ +#if !defined(__cplusplus) enum { false = 0, true = 1 }; +#endif #undef offsetof #ifdef __compiler_offsetof diff --git a/include/net/tcp_hiatus.h b/include/net/tcp_hiatus.h new file mode 100644 index 00000000000000..7b61940ac0e7fc --- /dev/null +++ b/include/net/tcp_hiatus.h @@ -0,0 +1,31 @@ +#ifndef _NET_TCP_HIATUS_H +#define _NET_TCP_HIATUS_H + +/* + * Attempt to streamline TCP. Gather statistics on tx sleeps + */ +enum { + k_tcp_launched, /* Number of frames launched */ + k_tcp_wait_for_sndbuf, + k_tcp_wait_for_memory, + k_tcp_defer_mtu_probe, + k_tcp_defer_cwnd_quota, + k_tcp_defer_snd_wnd, + k_tcp_defer_nagle, + k_tcp_defer_should, + k_tcp_defer_fragment, + k_tcp_launch_failed, + k_tcp_hiatus_reasons +}; +#if defined(CONFIG_TCP_HIATUS_COUNTS) +extern int tcp_hiatus_counts[k_tcp_hiatus_reasons] ; +#endif + +static inline void increment_tcp_hiatus_count(int X) +{ +#if defined(CONFIG_TCP_HIATUS_COUNTS) + tcp_hiatus_counts[X] += 1 ; +#endif +} + +#endif diff --git a/kernel/printk.c b/kernel/printk.c index e3602d0755b0dd..d085a246407d64 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -49,7 +49,10 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...) /* We show everything that is MORE important than this.. */ #define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ -#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ +/* Noisy kernel 7 */ +/* #define DEFAULT_CONSOLE_LOGLEVEL 7 */ /* anything MORE serious than KERN_DEBUG */ +/* Quiet kernel 3 */ +#define DEFAULT_CONSOLE_LOGLEVEL 3 /* KERN_ERR */ DECLARE_WAIT_QUEUE_HEAD(log_wait); @@ -698,9 +701,10 @@ asmlinkage int vprintk(const char *fmt, va_list args) t = cpu_clock(printk_cpu); nanosec_rem = do_div(t, 1000000000); - tlen = sprintf(tbuf, "[%5lu.%06lu] ", + tlen = sprintf(tbuf, "[%5lu.%06lu]:%x ", (unsigned long) t, - nanosec_rem / 1000); + nanosec_rem / 1000, + printk_cpu); for (tp = tbuf; tp < tbuf + tlen; tp++) emit_log_char(*tp); @@ -713,7 +717,10 @@ asmlinkage int vprintk(const char *fmt, va_list args) emit_log_char(*p); if (*p == '\n') + { new_text_line = 1; + if( p[1] == '\n' ) p++ ; /* Don't double-line-space */ + }; } /* diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1bcf9cd4baa08a..7e0839706cb826 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -95,7 +95,8 @@ config HEADERS_CHECK config DEBUG_SECTION_MISMATCH bool "Enable full Section mismatch analysis" - depends on UNDEFINED + depends on UNDEFINED || BLUEGENE + default y if BLUEGENE # This option is on purpose disabled for now. # It will be enabled when we are down to a resonable number # of section mismatch warnings (< 10 for an allyesconfig build) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 76b148bcb0dcb9..8eaa92eeba7960 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -271,6 +271,7 @@ #include <net/ip.h> #include <net/netdma.h> #include <net/sock.h> +#include <net/tcp_hiatus.h> #include <asm/uaccess.h> #include <asm/ioctls.h> @@ -292,6 +293,17 @@ atomic_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); /* + * Statistics about the number of waits in TCP for various reasons + */ +#if defined(CONFIG_TCP_HIATUS_COUNTS) +int tcp_hiatus_counts[k_tcp_hiatus_reasons] ; +EXPORT_SYMBOL(tcp_hiatus_counts) ; +#endif +#if defined(CONFIG_BGP_TORUS_DIAGNOSTICS) +int tcp_scattergather_frag_limit ; +EXPORT_SYMBOL(tcp_scattergather_frag_limit) ; +#endif +/* * Current number of TCP sockets. */ struct percpu_counter tcp_sockets_allocated; @@ -306,6 +318,7 @@ struct tcp_splice_state { unsigned int flags; }; + /* * Pressure flag: try to collapse. * Technical note: it is used by multiple contexts non atomically. @@ -640,8 +653,13 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) { struct sk_buff *skb; +#if defined(CONFIG_BLUEGENE) + /* Desire to have the TCP header quadword-aligned. */ + size = ALIGN(size, 16); +#else /* The TCP header must be at least 32-bit aligned. */ size = ALIGN(size, 4); +#endif skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); if (skb) { @@ -710,10 +728,18 @@ new_segment: i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, page, offset); +/* #if defined(CONFIG_BGP_TORUS_DIAGNOSTICS) */ +/* // Scatter-gather in torus driver not handling well if we have more than one frag */ +/* if (!can_coalesce && ((i > tcp_scattergather_frag_limit) || (i >= MAX_SKB_FRAGS))) { */ +/* tcp_mark_push(tp, skb); */ +/* goto new_segment; */ +/* } */ +/* #else */ if (!can_coalesce && i >= MAX_SKB_FRAGS) { tcp_mark_push(tp, skb); goto new_segment; } +/* #endif */ if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; @@ -753,8 +779,12 @@ new_segment: continue; wait_for_sndbuf: + + increment_tcp_hiatus_count(k_tcp_wait_for_sndbuf) ; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: + + increment_tcp_hiatus_count(k_tcp_wait_for_memory) ; if (copied) tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); @@ -999,8 +1029,10 @@ new_segment: continue; wait_for_sndbuf: + increment_tcp_hiatus_count(k_tcp_wait_for_sndbuf) ; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: + increment_tcp_hiatus_count(k_tcp_wait_for_memory) ; if (copied) tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index da2c3b8794f2b3..69e77d9e427c8c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -35,6 +35,7 @@ */ #include <net/tcp.h> +#include <net/tcp_hiatus.h> #include <linux/compiler.h> #include <linux/module.h> @@ -59,6 +60,15 @@ int sysctl_tcp_base_mss __read_mostly = 512; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +#if defined(CONFIG_TCP_CONGESTION_OVERRIDES) +int sysctl_tcp_force_nodelay ; +int sysctl_tcp_permit_cwnd ; +int sysctl_tcp_max_cwnd = 1000 ; +EXPORT_SYMBOL(sysctl_tcp_force_nodelay) ; +EXPORT_SYMBOL(sysctl_tcp_permit_cwnd) ; +EXPORT_SYMBOL(sysctl_tcp_max_cwnd) ; +#endif + static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -1145,6 +1155,11 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, in_flight = tcp_packets_in_flight(tp); cwnd = tp->snd_cwnd; +#if defined(CONFIG_TCP_CONGESTION_OVERRIDES) + cwnd = (cwnd < sysctl_tcp_permit_cwnd) + ? sysctl_tcp_permit_cwnd + : ( ( cwnd > sysctl_tcp_max_cwnd) ? sysctl_tcp_max_cwnd : cwnd ) ; +#endif if (in_flight < cwnd) return (cwnd - in_flight); @@ -1213,6 +1228,11 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) return 1; +#if defined(CONFIG_TCP_CONGESTION_OVERRIDES) + if (sysctl_tcp_force_nodelay) + return 1 ; +#endif + return 0; } @@ -1508,6 +1528,7 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } + /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. @@ -1534,6 +1555,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, /* Do MTU probing. */ result = tcp_mtu_probe(sk); if (!result) { + increment_tcp_hiatus_count(k_tcp_defer_mtu_probe) ; return 0; } else if (result > 0) { sent_pkts = 1; @@ -1548,20 +1570,32 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, cwnd_quota = tcp_cwnd_test(tp, skb); if (!cwnd_quota) + { + increment_tcp_hiatus_count(k_tcp_defer_cwnd_quota) ; break; + } if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) + { + increment_tcp_hiatus_count(k_tcp_defer_snd_wnd) ; break; + } if (tso_segs == 1) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? nonagle : TCP_NAGLE_PUSH)))) + { + increment_tcp_hiatus_count(k_tcp_defer_nagle) ; break; + } } else { if (!push_one && tcp_tso_should_defer(sk, skb)) + { + increment_tcp_hiatus_count(k_tcp_defer_should) ; break; } + } limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) @@ -1570,13 +1604,20 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (skb->len > limit && unlikely(tso_fragment(sk, skb, limit, mss_now))) + { + increment_tcp_hiatus_count(k_tcp_defer_fragment) ; break; + } TCP_SKB_CB(skb)->when = tcp_time_stamp; if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) + { + increment_tcp_hiatus_count(k_tcp_launch_failed) ; /* e.g. no memory when building TCP header */ break; + } + increment_tcp_hiatus_count(k_tcp_launched) ; /* Eventually, we didn't 'sleep' it. */ /* Advance the send_head. This one is sent out. * This call will increment packets_out. */ diff --git a/net/socket.c b/net/socket.c index 35dd7371752a94..2ed4918dc4b279 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1384,7 +1384,9 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) * necessary for a listen, and if that works, we mark the socket as * ready for listening. */ - +#if defined(CONFIG_BGP_TORUS) +extern int sysctl_bgp_torus_backlog_floor ; +#endif SYSCALL_DEFINE2(listen, int, fd, int, backlog) { struct socket *sock; @@ -1396,6 +1398,10 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; if ((unsigned)backlog > somaxconn) backlog = somaxconn; +#if defined(CONFIG_BGP_TORUS) +/* Apps (particularly mpich2) sometimes set 'backlog' a long way too small for cloud computing */ + if(backlog < sysctl_bgp_torus_backlog_floor ) backlog = sysctl_bgp_torus_backlog_floor ; +#endif err = security_socket_listen(sock, backlog); if (!err) |