aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKazutomo Yoshii <kazutomo@mcs.anl.gov>2010-04-19 09:44:29 -0500
committerEric Van Hensbergen <ericvh@gmail.com>2011-02-15 15:29:49 -0600
commitbee9f329eeef6c8eb95c35de4c5d22a0c05a1b3e (patch)
tree82717e09cbc58ea3671f9940daeb93dbd5e63524
parent8d7bff2d72660d9d60aa371ae3d1356bbf329a09 (diff)
downloadbluegene-ibm-cn-2.6.29.1.tar.gz
IBM CN patchibm-cn-2.6.29.1
-rw-r--r--arch/powerpc/Kconfig7
-rw-r--r--arch/powerpc/Makefile5
-rw-r--r--arch/powerpc/boot/Makefile4
-rw-r--r--arch/powerpc/boot/bgcns.h1060
-rw-r--r--arch/powerpc/boot/bgp.c166
-rw-r--r--arch/powerpc/boot/bgp_personality.h1086
-rw-r--r--arch/powerpc/boot/dts/bgp.dts127
-rwxr-xr-xarch/powerpc/boot/wrapper5
-rw-r--r--arch/powerpc/configs/44x/bgp_defconfig929
-rw-r--r--arch/powerpc/include/asm/bgcns.h1060
-rw-r--r--arch/powerpc/include/asm/bgp_personality.h1086
-rw-r--r--arch/powerpc/include/asm/bluegene.h71
-rw-r--r--arch/powerpc/include/asm/bluegene_ras.h107
-rw-r--r--arch/powerpc/include/asm/mmu-44x.h10
-rw-r--r--arch/powerpc/include/asm/page_32.h3
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h35
-rw-r--r--arch/powerpc/include/asm/processor.h15
-rw-r--r--arch/powerpc/include/bpcore/bgp_dma_memmap.h205
-rw-r--r--arch/powerpc/include/bpcore/bgp_types.h71
-rw-r--r--arch/powerpc/include/bpcore/ic_memmap.h803
-rw-r--r--arch/powerpc/include/common/alignment.h66
-rw-r--r--arch/powerpc/include/common/bgp_bitnumbers.h113
-rw-r--r--arch/powerpc/include/common/bgp_chipversion.h52
-rw-r--r--arch/powerpc/include/common/bgp_personality.h786
-rw-r--r--arch/powerpc/include/common/namespace.h47
-rw-r--r--arch/powerpc/include/spi/DMA_Assert.h276
-rw-r--r--arch/powerpc/include/spi/DMA_Counter.h2986
-rw-r--r--arch/powerpc/include/spi/DMA_Descriptors.h1505
-rw-r--r--arch/powerpc/include/spi/DMA_Fifo.h1011
-rw-r--r--arch/powerpc/include/spi/DMA_InjFifo.h2475
-rw-r--r--arch/powerpc/include/spi/DMA_Packet.h347
-rw-r--r--arch/powerpc/include/spi/DMA_RecFifo.h1810
-rw-r--r--arch/powerpc/include/spi/bpcore_interface.h41
-rw-r--r--arch/powerpc/include/spi/kernel_interface.h1982
-rw-r--r--arch/powerpc/include/spi/linux_interface.h777
-rw-r--r--arch/powerpc/include/spi/linux_kernel_spi.h113
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/cputable.c13
-rw-r--r--arch/powerpc/kernel/fpu.S8
-rw-r--r--arch/powerpc/kernel/head_44x.S91
-rw-r--r--arch/powerpc/kernel/misc_32.S20
-rw-r--r--arch/powerpc/kernel/smp.c8
-rw-r--r--arch/powerpc/lib/Makefile1
-rw-r--r--arch/powerpc/lib/copy_32.S12
-rw-r--r--arch/powerpc/lib/copy_tofrom_user.c19
-rw-r--r--arch/powerpc/mm/44x_mmu.c8
-rw-r--r--arch/powerpc/mm/Makefile2
-rw-r--r--arch/powerpc/mm/fault.c75
-rw-r--r--arch/powerpc/mm/mmap.c2
-rw-r--r--arch/powerpc/mm/mmu_context_nohash.c24
-rw-r--r--arch/powerpc/platforms/44x/Kconfig241
-rw-r--r--arch/powerpc/platforms/44x/Makefile1
-rw-r--r--arch/powerpc/platforms/44x/bgp.c205
-rw-r--r--arch/powerpc/platforms/44x/bgp_bic.c675
-rw-r--r--arch/powerpc/platforms/44x/bgp_cns.c244
-rw-r--r--arch/powerpc/platforms/44x/bgp_pers.c345
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype6
-rw-r--r--arch/powerpc/syslib/bgdd/Makefile11
-rw-r--r--arch/powerpc/syslib/bgdd/bgp_dma_base.c1284
-rw-r--r--arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c1588
-rw-r--r--arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c206
-rw-r--r--arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c3016
-rw-r--r--drivers/char/Makefile2
-rw-r--r--drivers/char/bluegene_console.c805
-rw-r--r--drivers/char/bluegene_networks.c202
-rw-r--r--drivers/net/Kconfig177
-rw-r--r--drivers/net/Makefile7
-rw-r--r--drivers/net/bgp_collective/Makefile7
-rw-r--r--drivers/net/bgp_collective/bgcol.c3330
-rw-r--r--drivers/net/bgp_collective/bgcol.h285
-rw-r--r--drivers/net/bgp_collective/bglink.h158
-rw-r--r--drivers/net/bgp_collective/bgnet.c827
-rw-r--r--drivers/net/bgp_collective/bgnet.h152
-rw-r--r--drivers/net/bgp_collective/bgp_dcr.h1041
-rw-r--r--drivers/net/bgp_collective/ppc450.h141
-rw-r--r--drivers/net/bgp_e10000/Makefile5
-rw-r--r--drivers/net/bgp_e10000/bgp_e10000.h175
-rw-r--r--drivers/net/bgp_e10000/bgp_e10000_main.c567
-rw-r--r--drivers/net/bgp_e10000/bgp_emac.c282
-rw-r--r--drivers/net/bgp_e10000/bgp_emac.h356
-rw-r--r--drivers/net/bgp_e10000/bgp_tomal.c1892
-rw-r--r--drivers/net/bgp_e10000/bgp_tomal.h423
-rw-r--r--drivers/net/bgp_network/450_tlb.h121
-rw-r--r--drivers/net/bgp_network/bgdiagnose.h183
-rw-r--r--drivers/net/bgp_network/bgp_net_traceflags.h56
-rw-r--r--drivers/net/bgp_statistics/Makefile4
-rw-r--r--drivers/net/bgp_statistics/bgp_stats.c258
-rw-r--r--drivers/net/bgp_torus/Makefile8
-rw-r--r--drivers/net/bgp_torus/bgp_bic_diagnosis.h75
-rw-r--r--drivers/net/bgp_torus/bgp_dma_ioctl.c677
-rw-r--r--drivers/net/bgp_torus/bgp_dma_memcpy.c1321
-rw-r--r--drivers/net/bgp_torus/bgp_dma_tcp.c931
-rw-r--r--drivers/net/bgp_torus/bgp_dma_tcp.h1623
-rw-r--r--drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c707
-rw-r--r--drivers/net/bgp_torus/bgp_dma_tcp_frames.c2712
-rw-r--r--drivers/net/bgp_torus/bgp_dma_tcp_quads.h394
-rw-r--r--drivers/net/bgp_torus/bgp_fpu_memcpy.c825
-rw-r--r--drivers/net/bgp_torus/bgp_memcpy.h204
-rw-r--r--drivers/net/bgp_torus/bgtor.h310
-rw-r--r--drivers/net/bgp_torus/bgtornic.c597
-rw-r--r--drivers/net/bgp_torus/bgtornic.h126
-rw-r--r--drivers/net/bgp_torus/torus.c548
-rw-r--r--fs/Kconfig2
-rw-r--r--include/linux/KernelFxLog.h35
-rw-r--r--include/linux/alignment_histograms.h38
-rw-r--r--include/linux/cpumask.h6
-rw-r--r--include/linux/kmalloc_sizes.h35
-rw-r--r--include/linux/resource.h4
-rw-r--r--include/linux/skbuff.h5
-rw-r--r--include/linux/stddef.h3
-rw-r--r--include/net/tcp_hiatus.h31
-rw-r--r--kernel/printk.c13
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--net/ipv4/tcp.c32
-rw-r--r--net/ipv4/tcp_output.c41
-rw-r--r--net/socket.c8
116 files changed, 52012 insertions, 107 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 74cc312c347cf8..9900af0e37773f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -510,6 +510,13 @@ config CMDLINE
some command-line options at build time by entering them here. In
most cases you will need to specify the root device here.
+config WRAP_COPY_TOFROM_USER
+ bool "C-language wrapper for copy to/from user"
+ default n
+ help
+ Set this if you want to instrument the low-level function which block-copies data
+ between user-space and kernel-space
+
config EXTRA_TARGETS
string "Additional default image types"
help
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 72d17f50e54fa7..fc68dca9e91d73 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -125,7 +125,9 @@ ifeq ($(CONFIG_FUNCTION_TRACER),y)
KBUILD_CFLAGS += -mno-sched-epilog
endif
-cpu-as-$(CONFIG_4xx) += -Wa,-m405
+ifndef CONFIG_BGP
+cpu-as-$(CONFIG_4xx) += -Wa,-m450
+endif
cpu-as-$(CONFIG_6xx) += -Wa,-maltivec
cpu-as-$(CONFIG_POWER4) += -Wa,-maltivec
cpu-as-$(CONFIG_E500) += -Wa,-me500
@@ -151,6 +153,7 @@ core-y += arch/powerpc/kernel/ \
core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/
core-$(CONFIG_XMON) += arch/powerpc/xmon/
core-$(CONFIG_KVM) += arch/powerpc/kvm/
+core-$(CONFIG_BGP) += arch/powerpc/syslib/bgdd/
drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index e84df338ea2987..5e09e7e0658b7d 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -39,6 +39,7 @@ DTS_FLAGS ?= -p 1024
$(obj)/4xx.o: BOOTCFLAGS += -mcpu=405
$(obj)/ebony.o: BOOTCFLAGS += -mcpu=405
+$(obj)/bgp.o: BOOTCFLAGS += -mcpu=405
$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405
$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405
$(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405
@@ -60,7 +61,7 @@ src-wlib := string.S crt0.S crtsavres.S stdio.c main.c \
gunzip_util.c elf_util.c $(zlib) devtree.c oflib.c ofconsole.c \
4xx.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c bamboo.c \
cpm-serial.c stdlib.c mpc52xx-psc.c planetcore.c uartlite.c \
- fsl-soc.c mpc8xx.c pq2.c
+ fsl-soc.c mpc8xx.c pq2.c bgp.c
src-plat := of.c cuboot-52xx.c cuboot-824x.c cuboot-83xx.c cuboot-85xx.c holly.c \
cuboot-ebony.c treeboot-ebony.c prpmc2800.c \
ps3-head.S ps3-hvcall.S ps3.c treeboot-bamboo.c cuboot-8xx.c \
@@ -193,6 +194,7 @@ image-$(CONFIG_PPC_PSERIES) += zImage.pseries
image-$(CONFIG_PPC_MAPLE) += zImage.pseries
image-$(CONFIG_PPC_IBM_CELL_BLADE) += zImage.pseries
image-$(CONFIG_PPC_PS3) += dtbImage.ps3
+image-$(CONFIG_BGP) += dtbImage.bgp
image-$(CONFIG_PPC_CELLEB) += zImage.pseries
image-$(CONFIG_PPC_CELL_QPACE) += zImage.pseries
image-$(CONFIG_PPC_CHRP) += zImage.chrp
diff --git a/arch/powerpc/boot/bgcns.h b/arch/powerpc/boot/bgcns.h
new file mode 100644
index 00000000000000..238ad401a3cbfb
--- /dev/null
+++ b/arch/powerpc/boot/bgcns.h
@@ -0,0 +1,1060 @@
+/*
+ * (C) Copyright IBM Corp. 2007, 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Tom Gooding, IBM
+ */
+
+
+#ifndef _BGCNS_H
+#define _BGCNS_H
+
+
+#ifndef __ASSEMBLY__
+
+/*! @page CNS Common Node Services
+ *
+ * @section CNS_S10 Overview
+ *
+ * As the name implies, the <b>Common Node Services (CNS)</b> layer provides @b services
+ * to the kernel. These services may be simple queries abstracting various node
+ * specific data (such as DDR size) or they may be more sophisticated software services,
+ * such as common machine check handling. Additionally, some services may be implicit,
+ * such as the initialization of various hardware devices unique to Blue Gene, such as
+ * Netbus and SerDes.
+ *
+ * Services are not directly linked into the kernel, but rather are invoked from kernel
+ * code via a <b>service directory</b> which is itself part of an overall <b>service
+ * descriptor</b>. This service descriptor is constructed during initialization and
+ * is passed to the kernel when the kernel is booted. The service directory is a
+ * collection of <b>service references</b>.
+ *
+ * During partition (block) booting, ELF images are loaded onto the compute and I/O nodes.
+ * The bootloader (@i aka microloader) boots first and then transfers control to the Common
+ * Node Services layer so that it, in turn, may boot.
+ *
+ * Once the CNS layer has booted, control is transferred to the kernel so that it may also
+ * boot. All services provided by the CNS layer are immediately available at this time.
+ *
+ * @section CNS_S20 Programming Model
+ *
+ * A kernel running on top of the CNS layer is not statically linked to the common services.
+ * Instead, the services are called via function pointers provided by the services descriptor,
+ * which is described here: @ref _BGCNS_ServiceDirectory.
+ *
+ * The kernel must operate under the following rules and restrictions:
+ * @li The kernel must not alter the services descriptor. The descriptor must be treated as a read-only
+ * data structure even though the kernel may have the ability to alter it. Because CNS trusts the
+ * kernel, this also implies that the kernel must not expose the descriptor to any untrusted
+ * software (such as application code).
+ * @li The kernel must ensure that the CNS virtual memory region is mapped prior to invoking any
+ * service.
+ * @li The kernel must ensure that any data passed to services via parameters is mapped.
+ * Specifically, TLB entries must be mapped as shared (TID = 0) and must be either readable
+ * (input parameters) or readable and writeable (output parameters).
+ * @li The kernel must treat the virtual address range (@ref _BGCNS_Descriptor::baseVirtualAddress ,
+ * _BGCNS_Descriptor::baseVirtualAddress + @ref _BGCNS_Descriptor::size - 1) as reserved.
+ * That is, the kernel must not use this region of virtual memory for anything besides accessing
+ * the services descriptor.
+ * @li The kernel must treat the physical address range (@ref _BGCNS_Descriptor::basePhysicalAddress,
+ * _BGCNS_Descriptor::basePhysicalAddress + _BGCNS_Descriptor::size - 1) as reserved. The
+ * kernel must not map this memory for any other use.
+ * @li The kernel must not access any of the reserved virtual address regions with TLB settings that
+ * are different from those used by CNS. The kernel is allowed to unmap any of the reserved
+ * memory TLBs for its own use. However, in such a case and per the rule above, the kernel must
+ * ensure that the region is mapped prior to using any CNS facilities (such as invoking a service).
+ * @li CNS may need to map one or more TLB entries in order to access Blue Gene devices. In such a case,
+ * CNS may borrow TLB entries; the TLB will be returned to its original state before the service returns
+ * control to the invoking kernel. Kernels may avoid this behavior for specific devices by using
+ * the mapDevice service.
+ * @li The kernel's ELF image must avoid the 256K region of memory between 0x07000000 and 0x0703FFFF. This
+ * region is used for the pre-relocated CNS image and will be available for general use once CNS boot
+ * is complete.
+ * @li The kernel must not alter any reserved SPRs, DCRs or memory-mapped device registers.
+ *
+ * The CNS software may behave unpredictably if any of these rules and restrictions is violated.
+ *
+ * Kernels may make the following assumptions about CNS:
+ *
+ * @li The data passed in the firmware descriptor (see below) is static. Specifically, the base addresses,
+ * size and service directory will not change once CNS boot is complete.
+ *
+ * @subsection CNS_21 Programming Examples
+ *
+ * @subsubsection CNS_211 Obtaining the Personality
+ *
+ * The following example shows how to fetch a copy of the Blue Gene personality structure and also
+ * serves as a simple example of invoking a service:
+ *
+ * @code
+ *
+ * BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time
+ * _BGP_Personality_t* pers = (_BGP_Personality_t*)(*descr->services->getPersonalityData)();
+ * ...
+ * @endcode
+ *
+ * The programming model guarantees that the descriptor is static. Thus, one can provide a
+ * convenience method to make service invocation a little more readable
+ *
+ * @code
+ *
+ *
+ * static BGCNS_Descriptor* _cns_descriptor = ...; // obtained from CNS at boot time
+ *
+ * inline BGCNS_ServiceDirectory* cns() { return _cns_descriptor->services; }
+ *
+ * void foo() {
+ * _BGP_Personality_t* pers = (_BGP_Personality_t*)cns()->getPersonalityData();
+ * ...
+ * }
+ *
+ * @endcode
+ *
+ * This style will be used in all of the subsequent examples.
+ *
+ * @subsubsection CNS_212 SMP Initialization
+ *
+ * Common Node Services will launch the kernel on a single core (typically core 0) and will
+ * leave the remaining cores parked. The kernel can activate additional cores via the @c takeCPU
+ * service. Here is a very simple example of such kernel code:
+ *
+ * @code
+ *
+ * void anEntryPoint(unsigned core, void* arg_not_used) {
+ * // Do whatever your kernel needs to do here. Typically,
+ * // this function never returns. You will arrive here
+ * // when takeCPU is invoked (below).
+ * }
+ *
+ * void someCodeOnTheMainThread() {
+ *
+ * // ...
+ *
+ * unsigned N = cns()->getNumberOfCores();
+ *
+ * for (core = 1; core < N; core++) {
+ * if ( cns()->takeCPU(core, NULL, &anEntryPoint) != 0 ) {
+ * // error handling goes here
+ * }
+ * }
+ *
+ * // ...
+ * }
+ *
+ * @endcode
+ *
+ * @subsubsection CNS_213 Version Compatibility
+ *
+ * Common Node Services structures and APIs should remain compatible within maintenance
+ * releases and e-fixes. Kernel's may add a runtime check to ensure that the version
+ * of CNS is compatible with the version from compile time. This is done as follows:
+ *
+ * @code
+ *
+ * BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time
+ *
+ * if ( ! BGCNS_IS_COMPATIBLE(descr) ) {
+ * // incompatible CNS (panic?)
+ * }
+ *
+ * @endcode
+ *
+ * @subsubsection CNS_23 Interrupts
+ *
+ * A kernel wanting to use the CNS interrupt services would first have to enable interrupts
+ * for the appropriate Blue Gene BIC group and IRQ within that group. This would likely be
+ * done at boot time. So, for example, such a kernel could enable interrupts for the Universal
+ * Performance Counter (group 5, IRQ 2) to be handled by the non-critical handler of core 0 as
+ * follows:
+ *
+ * @code
+ * cns()->enableInterrupt(5, 2, BGCNS_NonCritical, 0);
+ * @endcode
+ *
+ * Such a kernel might also maintain a collection of routines that act as subhandlers of the
+ * non-critical interrupt handler. In this example, we'll assume it is simply a two
+ * dimensional array indexed by group and IRQ:
+ *
+ * @code
+ * subhandlers[5][2] = &theUpcSubHandler;
+ * @endcode
+ *
+ * That kernel's non-critical interrupt handler would then typically handle all interrupts by
+ * successively invoking the getInterrupt() service to determine the group and IRQ, and then
+ * dispatching the appropriate subhandler. Additionally, the interrupt will be acknowledged
+ * so to avoid continuous interruption:
+ *
+ * @code
+ * unsigned grp, irq;
+ *
+ * while ( cns()->getInterrupt(&g, &i, BGCNS_NonCritical) == 0) {
+ * (*subhandlers[g][i])(); // dispatch the handler
+ * cns()->acknowledgeInterrupt(g,i); // ack the interrupt
+ * }
+ * @endcode
+ *
+ * @subsubsection CNS_24 Global Barriers and Interrupts
+ *
+ * The Blue Gene/P Global Interrupt Controller (aka GLINT) provides 4 independent channels
+ * that may be configured as either a global barrier or a global interrupt.
+ *
+ * Barriers are constructed by invoking the barrier service:
+ *
+ * @code
+ * unsigned channel = 0;
+ *
+ * // synchronize:
+ * int reset = 1;
+ * int rc;
+ * while ( (rc = cns()->globalBarrier_nonBlocking(channel, reset, 1000)) == BGCNS_RC_CONTINUE ) {
+ * reset = 0;
+ * }
+ *
+ * if ( rc == BGCNS_RC_COMPLETE ) {
+ * // good path
+ * }
+ * else {
+ * // error
+ * }
+ * @endcode
+ *
+ * Similarly, a barrier with a timeout can also be constructed:
+ *
+ * @code
+ * unsigned channel = 0;
+ * int reset = 1;
+ * unsigned long long startTime = ...; // obtain current time
+ * int rc;
+ *
+ * while ( (rc = cns()->globalBarrier_nonBlocking(channel,reset, 1000)) == BGCNS_RC_CONTINUE ) {
+ * reset = 0;
+ * unsigned long long currentTime = ...; // obtain current time
+ * if ( currentTime - startTime > timeout )
+ * break;
+ * }
+ *
+ * if ( rc == BGCNS_RC_COMPLETE ) {
+ * // good path
+ * }
+ * else {
+ * // timeout or error
+ * }
+ * @endcode
+ *
+ * A node may opt out of a barrier channel via the disableBarrier service:
+ *
+ * @code
+ *
+ * // some other synchronization mechanism needs to go here
+ *
+ * cns()->disableBarrier(channel);
+ *
+ * @endcode
+ *
+ * Conversely, it may opt back in:
+ *
+ * @code
+ * cns()->enableBarrier(channel, user_mode);
+ * @endcode
+ *
+ * By default, CNS reserves the use of channel 2 as a global interrupt for environmental
+ * monitoring. It also reserves channel 3 for use as a supervisory mode, compute-node
+ * only barrier. Compute node kernels are free to share this channel for the same
+ * purpose (compute node, supervisory barrier). The enable/disable barrier services
+ * may return errors if operating on a reserved channel.
+ *
+ * NOTE: The standard BG/P software stack, which includes I/O node Linux and Compute Node
+ * Kernel (CNK) uses channel 0 as an I/O node barrier during boot and transforms it to an
+ * compute-node only barrier when jobs execute.
+ *
+ *
+ * @section CNS_3 DMA Services
+ *
+ * The DMA services provided in CNS are low-level services. Interested readers of this area should
+ * also look at the documentation for the DMA SPIs, which are at a slightly higher level.
+ *
+ *
+ *
+ * @section CNS_4 Reserved and Preferred Addresses
+ *
+ *
+ * The following virtual memory regions are reserved and must be avoided by
+ * kernels:
+ *
+ * @code
+ *
+ * +------------+------------+------+----------------------+-----------------------+
+ * | Lower | Upper | Size | Usage | Attributes |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | CNSlow[1] | CNShigh[2] | 256K | CNS | I, Rs, Ws, Xs |
+ * +------------+------------+------+----------------------+-----------------------+
+ *
+ * [1] CNSlow = descr->baseVirtualAddress , usually 0xFFF40000
+ * [2] CNShigh = descr->baseVirtualAddress + descr->size - 1; usually 0xFFF7FFFF
+ *
+ * @endcode
+ *
+ * The following virtual memory regions are used by default in CNS. Kernels that wish to have
+ * a different memory map may do so via the mapDevice service.
+ *
+ * @code
+ * +------------+------------+------+----------------------+-----------------------+
+ * | Lower | Upper | Size | Usage | Attributes |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFB0000 | 0xFFFCFFFF | 64K | Torus | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFD0000 | 0xFFFD3FFF | 16K | DMA | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFD9000 | 0xFFFD9FFF | 4K | DevBus | I, G, Rs, Ws |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFDA000 | 0xFFFDAFFF | 4K | UPC | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFDC000 | 0xFFFDD3FF | 4K | Collective | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFDE000 | 0xFFFDEFFF | 4K | BIC | I, G, Rs, Ws, Xs |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFF0000 | 0xFFFF3FFF | 16K | Lockbox (supervisor) | I, G, Rs, Ws |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFF4000 | 0xFFFF7FFF | 16K | Lockbox (user) | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFF8000 | 0xFFFFFFFF | 32K | SRAM | SWOA, WL1, Rs, Ws, Xs |
+ * +------------+------------+------+----------------------+-----------------------+
+ * @endcode
+ *
+ */
+
+
+#define BGCNS_VERSION 0x01030000 /* V1R3M0 efix 0 */
+#define BGCNS_IS_COMPATIBLE(descr) ( ((descr)->version & 0xFFFF0000) == (BGCNS_VERSION & 0xFFFF0000) ) //!< True iff the given descriptor is compatible with this version of CNS
+
+/* ! @enum BGCNS_InterruptType */
+/* ! @brief Defines the different types of interrupts known to */
+/* ! Common Node Services. */
+typedef enum {
+ BGCNS_NonCritical, //!< Non-critical interrupt
+ BGCNS_Critical, //!< Critical interrupt
+ BGCNS_MachineCheck, //!< Machine check
+} BGCNS_InterruptType;
+
+/* ! @enum BGCNS_FifoOperation */
+/* ! @brief Defines the types of FIFO operations */
+/* ! @see _BGCNS_ServiceDirectory::setDmaFifoControls */
+/* ! @see _BGCNS_ServiceDirectory::setDmaLocalCopies */
+/* ! @see _BGCNS_ServiceDirectory::setDmaPriority */
+typedef enum {
+ BGCNS_Disable = 0,
+ BGCNS_Enable = 1,
+ BGCNS_Reenable = 2
+} BGCNS_FifoOperation;
+
+/* ! @enum BGCNS_FifoFacility */
+/* ! @brief Defines the various types of FIFO facilities */
+typedef enum {
+ BGCNS_InjectionFifo, //!< Normal Injection FIFO
+ BGCNS_ReceptionFifo, //!< Normal Reception FIFO
+ BGCNS_ReceptionHeaderFifo, //!< Reception Header FIFO (typically used only for debugging)
+ BGCNS_InjectionFifoInterrupt,
+ BGCNS_ReceptionFifoInterrupt,
+ BGCNS_ReceptionHeaderFifoInterrupt,
+ BGCNS_InjectionCounterInterrupt,
+ BGCNS_ReceptionCounterInterrupt
+} BGCNS_FifoFacility;
+
+/* ! @enum BGCNS_LinkType */
+/* ! @brief Defines the types of MAC links. */
+/* ! @see _BGCNS_ServiceDirectory::macTestLink */
+typedef enum {
+ BGCNS_Transmitter, //!< A transmitter link.
+ BGCNS_Receiver //!< A receiver link.
+} BGCNS_LinkType;
+
+/* ! @enum BGCNS_EnvmonParameter */
+/* ! @brief Enumerates the various environmental monitor parameters. */
+/* ! @see _BGCNS_ServiceDirectory::getEnvmonParm */
+/* ! @see _BGCNS_ServiceDirectory::setEnvmonParm */
+typedef enum {
+ BGCNS_envmon_period = 0,
+ BGCNS_envmon_policy,
+ BGCNS_envmon_globintwire,
+
+ /* temporary */
+ BGCNS_envmon_duration,
+ BGCNS_envmon_ddrratio,
+ BGCNS_envmon_numparms
+} BGCNS_EnvmonParameter;
+
+
+#define BGCNS_RC_COMPLETE 0 //!< Indicates that the operation completed normally.
+#define BGCNS_RC_CONTINUE 1 //!< Indicates that the operation is still in progress.
+#define BGCNS_RC_TIMEOUT -1 //!< Indicates that the operation timed out.
+#define BGCNS_RC_ERROR -2 //!< Indicates that the operation failed.
+
+#define BGCNS_NUM_DMA_RECEPTION_GROUPS 4
+#define BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP 8
+
+/* ! @brief Describes the mapping of physical torus reception FIFOs to DMA reception FIFOs (rmFIFOs). */
+/* ! The first dimension indexes DMA reception groups, which are a combination of PID0 and PID1 bits */
+/* ! from the DMA packet. */
+/* ! */
+/* ! The second dimension indexes through the different dimensions: X+, X-, Y+, Y-, Z+, Z-, high priority */
+/* ! and local copy. */
+typedef unsigned char BGCNS_ReceptionMap[BGCNS_NUM_DMA_RECEPTION_GROUPS][BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP];
+
+/* ! @brief Indicates that an interrupt is to be broadcast on all cores. */
+/* ! @see _BGCNS_ServiceDirectory::enableInterrupt */
+#define BGCNS_ALL_CORE_BROADCAST 0xFFFFFFFFu
+
+
+/* ! @enum BGCNS_DeviceMasks */
+/* ! @brief Provides a list of masks for various Blue Gene devices */
+
+typedef enum {
+ BGCNS_SRAM = 0x80000000u,
+ BGCNS_BIC = 0x40000000u,
+ BGCNS_Torus = 0x20000000u,
+ BGCNS_DevBus = 0x10000000u,
+ BGCNS_XEMAC = 0x08000000u,
+ BGCNS_LockBox = 0x04000000u,
+ BGCNS_Collective = 0x02000000u,
+ BGCNS_SRAM_Err = 0x01000000u,
+ BGCNS_DMA = 0x00800000u,
+ BGCNS_UPC = 0x00400000u
+} BGCNS_DeviceMasks;
+
+/* ! @typedef BGCNS_ServiceDirectory */
+/* ! @struct _BGCNS_ServiceDirectory */
+/* ! @brief The service directory is a collection of function pointers to services */
+/* ! provided by the Common Node Services. */
+typedef struct _BGCNS_ServiceDirectory {
+
+ /*------------------------------------------*/
+ /*--- Informational services for the node --*/
+ /*------------------------------------------*/
+
+
+ int (*isIONode)(void); //!< Returns 1 if this is an I/O node; 0 if not.
+
+
+ /*-----------------------------------------------------------------*/
+ /*--- Informational services for obtaining Raw personality data ---*/
+ /*-----------------------------------------------------------------*/
+
+ unsigned int (*getPersonalitySize)(void); //!< Returns the size (in bytes) of the Blue Gene personality.
+ void* (*getPersonalityData)(void); //!< Returns a pointer to the raw personality data.
+
+
+ /*-----------------------------------------------*/
+ /*--- Services for Symmetric Multi-Processing ---*/
+ /*-----------------------------------------------*/
+
+
+ unsigned (*getNumberOfCores)(void); //!< Returns the number of CPUs on this node.
+
+ /* ! @brief Called by the kernel to activate a CPU. */
+ /* ! @param[in] cpu The index of the cpu (core) to be activated. */
+ /* ! @param[in] entry The (kernel) entry point function. This function will be invoked when */
+ /* ! the CPU is actually activated. */
+ /* ! @param[in] arg A pointer to the lone argument to be passed to the entry point. */
+ /* ! @return Zero (0) if the CPU was succsessfully activated. Non-zero if the CPU was not */
+ /* ! activated (e.g. invalid cpu argument, or the cpu has already been */
+ /* ! activated). */
+ /* ! @remarks See Section x of the Common Node Services overview for details. */
+ int (*takeCPU)(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg));
+
+
+ /*--------------------------------------*/
+ /*--- Services for Blue Gene devices ---*/
+ /*--------------------------------------*/
+
+ /* ! @brief Checks active devices for a clean termination state and returns 0 */
+ /* ! if everything is nominal. Returns non-zero if any anomaly is */
+ /* ! detected and logs violations. */
+ /* ! @param[in] job_rc specifies the return code of the job that is terminating. */
+ int (*terminationCheck)(int job_rc);
+
+ /*-------------------------------*/
+ /*--- Services for interrupts ---*/
+ /*-------------------------------*/
+
+
+ /* ! @brief Enables the specified interrupt. For all interrupts except inter-processor */
+ /* ! interrupts, the interrupt will bendled by the specified core. */
+ /* ! @param[in] group Specifies the Blue Gene interrupt group */
+ /* ! @param[in] irq Specifies the interrupt index within the group */
+ /* ! @param[in] itype Specifies the type of interrupt that hardware will present */
+ /* ! for this group/irq. */
+ /* ! @param[in] core Specifies which core will handle the interrupt. If specified as */
+ /* ! BGCNS_ALL_CORE_BROADCAST, then all cores will handle the interrupt. */
+ /* ! @return Returns zero (0) if the interrupt is enabled and returns non-zero if it was not */
+ /* ! (including the case of bad arguments). */
+ int (*enableInterrupt)(unsigned group, unsigned irq, BGCNS_InterruptType itype, unsigned core);
+
+ /* ! @brief Disables the specified interrupt. */
+ /* ! @param[in] group Specifies the Blue Gene interrupt group */
+ /* ! @param[in] irq Specifies the interrupt index within the group */
+ /* ! @return Returns zero (0) if the interrupt is disabled and returns non-zero if it was not */
+ /* ! (including the case of bad arguments). */
+ int (*disableInterrupt)(unsigned group, unsigned irq);
+
+ /* ! @brief Queries the Blue Gene interrupt hardware for interrupts of the given */
+ /* ! type and returns the group/IRQ. This service is typically used in the */
+ /* ! context of an interrupt handler. Since multiple interrupt conditions */
+ /* ! may be present, the service is typically invoked from the handler */
+ /* ! (along with corresponding acknowledgement) until the return code */
+ /* ! indicates that no more interrupts are present. */
+ /* ! @param[out] group Specifies the Blue Gene interrupt group. The value is valid */
+ /* ! only when the return code is 0. */
+ /* ! @param[out] irq Specifies the interrupt index within the group. The value is */
+ /* ! valid only when the reutrn code is zero. */
+ /* ! @param[in] itype Specifies the type of interrupt being queried. */
+ /* ! @return Returns zero (0) if an interrupt condition of the specified type exists. Returns -1 */
+ /* ! if no such condition exists. */
+ int (*getInterrupt)(BGCNS_InterruptType itype, unsigned* group, unsigned* irq);
+
+ /* ! @brief Acknowledges the specified interrupt, thus clearing the interrupt */
+ /* ! condition in the interrupt controller hardware. */
+ /* ! @param[in] group Specifies the Blue Gene interrupt group */
+ /* ! @param[in] irq Specifies the interrupt index within the group */
+ /* ! @return Returns zero (0) if the interrupt is acknowledged and returns non-zero if it was not */
+ /* ! (including the case of bad arguments). */
+ /* ! @remarks Note that for some interrupts, it is not sufficient to only acknowledge */
+ /* ! the interrupt; the hardware condition that triggered the interrupt may */
+ /* ! also need to be cleared. */
+ int (*acknowledgeInterrupt)(unsigned group, unsigned irq);
+
+ /* ! @brief Raises the specified interrupt. */
+ /* ! @param[in] group Specifies the Blue Gene interrupt group */
+ /* ! @param[in] irq Specifies the interrupt index within the group */
+ int (*raiseInterrupt)(unsigned group, unsigned irq);
+
+
+ /*------------------------*/
+ /*--- Mailbox services ---*/
+ /*------------------------*/
+
+ unsigned (*getMailboxMaximumConsoleInputSize)(void); //!< Returns the actual maximum console message input data size.
+ unsigned (*getMailboxMaximumConsoleOutputSize)(void); //!< Returns the actual maximum console message output data size.
+
+ /* ! @brief Writes a text message to the output mailbox. */
+ /* ! @param[in] msg a pointer to the message to be written. */
+ /* ! @param[in] msglen the length (in bytes) of the message to be written. */
+ /* ! @remarks As with all common services, the message data area must be mapped via */
+ /* ! the TLB when the service is called. The behavior is not defined if this */
+ /* ! is not the case. */
+ /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */
+ /* wrong (including a message that is too large). */
+ int (*writeToMailboxConsole)(char *msg, unsigned msglen);
+
+ /* ! @brief Writes a text message to the output mailbox but does not wait for a */
+ /* ! response back from the control system. When this service is used, */
+ /* ! the caller must poll for completion using the testForOutboxCompletion */
+ /* ! service. */
+ /* ! @param[in] msg a pointer to the message to be written. */
+ /* ! @param[in] msglen the length (in bytes) of the message to be written. */
+ /* ! @remarks As with all common services, the message data area must be mapped via */
+ /* ! the TLB when the service is called. The behavior is not defined if this */
+ /* ! is not the case. */
+ /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */
+ /* wrong (including a message that is too large). */
+ int (*writeToMailboxConsole_nonBlocking)(char* msg, unsigned msglen);
+
+ /* ! @brief Tests the outbox to see if the last message was picked up by the control */
+ /* ! system. */
+ /* ! @return Zero (0) if the last message was piecked and returns non-zero if it has not. */
+ /* ! @remarks Typically the caller will invoke this service after having called */
+ /* ! writeToMailboxConsole_nonBlocking and will then invoke this service in a */
+ /* ! loop until zero is returned. */
+ int (*testForOutboxCompletion)(void);
+
+ /* ! @brief Reads a message from the input mail box. */
+ /* ! @param msg a pointer to a data area into which the message will be placed. */
+ /* ! @param maxMsgSize gives the size of the data area, i.e. the largest message */
+ /* ! that may be safely received into the buffer. */
+ /* ! @return The actual length of the message (0 if no message was receieved). */
+ /* ! @remarks As with all common services, the message data area must be mapped */
+ /* ! via the TLB when this service is called. The results are not defined if */
+ /* ! this is not the case. */
+ unsigned (*readFromMailboxConsole)(char *buf, unsigned bufsize);
+
+ int (*testInboxAttention)(void); //!< Returns 1 if something is available in the input mailbox.
+
+ int (*_no_longer_in_use_1_)(void); //!< Obsolete ... do not use.
+
+ int (*writeToMailbox)(void* message, unsigned length, unsigned cmd);
+
+ /*------------------------------------*/
+ /*--- RAS and diagnostic services ---*/
+ /*------------------------------------*/
+
+ /* ! @brief TBD */
+ void (*machineCheck)(void *regs);
+
+ /* ! @brief Writes a RAS event to the log. */
+ /* ! @param[in] facility The facility (aka component). */
+ /* ! @param[in] unit The unit (aka subcomponent). */
+ /* ! @param[in] err_code The error code. */
+ /* ! @param[in] numDetails The number of additional details. */
+ /* ! @param[in] details The list of additional details. */
+ /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+ /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */
+ int (*writeRASEvent)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] );
+
+ /* ! @brief Writes a RAS string to the log. */
+ /* ! @param[in] facility The facility (aka component). */
+ /* ! @param[in] unit The unit (aka subcomponent). */
+ /* ! @param[in] err_code The error code. */
+ /* ! @param[in] str The message string being written (ASCII encoded, null-terminated). Note that the length of this string is */
+ /* ! limited to _BGP_RAS_ASCII_MAX_LEN characters. The implementation may choose to truncate the string if it exceeds this */
+ /* ! length. */
+ /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */
+ /* ! string was truncated). */
+ /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */
+ int (*writeRASString)( unsigned facility, unsigned unit, unsigned short err_code, char* str );
+
+
+ /*---------------------------------*/
+ /*--- Global Interrupt services ---*/
+ /*---------------------------------*/
+
+ /* ! @brief A global (compute node) barrier. This call will block until all other compute nodes */
+ /* ! in the partition also arrive at the barrier. */
+ int (*globalBarrier)(void);
+
+ /* ! @brief A global (compute node) barrier. This call will block until all other compute nodes */
+ /* ! in the partition also arrive at the barrier or until the timeout is reached. */
+ /* ! @param timeoutInMillis specifies the timeout duration. Units are milliseconds. */
+ /* ! @return BGCNS_RC_COMPLETE if the barrier completed. BGCNS_RC_TIMEOUT if the barrier timed */
+ /* ! out. BGCNS_RC_ERROR if some other error occurred. */
+ int (*globalBarrierWithTimeout)(unsigned timeoutInMillis);
+
+
+
+ /*-------------------------*/
+ /*--- Network services ---*/
+ /*-------------------------*/
+
+
+ void (*initializeNetworks)(void); //!< @todo Is this is going away??? Talk to Andy
+
+ void (*_no_longer_in_use_381)(void); //!< @warning Do not use
+
+ void (*_no_longer_in_use_384)(void);//!< @warning Do not use
+
+
+ /*--------------------------*/
+ /*--- DMA unit services ---*/
+ /*--------------------------*/
+
+#define BGCNS_DMA_CAPTURE_X_PLUS 0 //!< watch the X+ receiver
+#define BGCNS_DMA_CAPTURE_X_MINUS 1 //!< watch the X- receiver
+#define BGCNS_DMA_CAPTURE_Y_PLUS 2 //!< watch the Y+ receiver
+#define BGCNS_DMA_CAPTURE_Y_MINUS 3 //!< watch the Y- receiver
+#define BGCNS_DMA_CAPTURE_Z_PLUS 4 //!< watch the Z+ receiver
+#define BGCNS_DMA_CAPTURE_Z_MINUS 5 //!< watch the Z- receiver
+#define BGCNS_DMA_CAPTURE_DISABLE 7 //!< disable link capturing
+
+ /* ! @brief Sets the link capture facility of the DMA unit to watch the specified */
+ /* ! receiver (or disable). */
+ /* ! @param[in] link Specifies the link being monitored. Use the BGCNS_DMA_CAPTURE_* */
+ /* ! mnemonics defined above. */
+ /* ! @return Zero if the operation succeeded, non-zero if it did not (e.g. an invalid */
+ /* ! link was specified). */
+ int (*setDmaLinkCapture)(int link);
+
+ /* ! @brief Clears the link capture unit so that another packet can be captured. */
+ void (*clearDmaLinkCapture)(void);
+
+#define BGCNS_RC_DMA_NO_PACKET_CAPTURED 0
+#define BGCNS_RC_DMA_CAPTURE_UNIT_ERROR -1
+#define BGCNS_RC_DMA_DATA_CONFLICT -2 //!< if initial read indicates a bad packet is captured but subsequent read shows bad packet not captured
+#define BGCNS_RC_DMA_DATA_CONFLICT2 -3 //!< if bad packet is captured, but all the bytes are the same
+ /* ! @brief Reads the DMA link capture packets. */
+ int (*readDmaLinkCapturePackets)(unsigned char* good_packet, int* good_packet_size, unsigned char* bad_packet, int* bad_packet_size);
+
+
+#define BGCNS_DMA_ALL_GROUPS 0xFFFFFFFF
+
+ /* ! @brief Sets FIFO controls for the DMA unit. */
+ /* ! */
+ /* ! An operation on facility BGCNS_InjectionFifo enables or disables a subset of the 128 DMA injection FIFOs. */
+ /* ! The FIFOs are organized into four groups of 32. The mask argument is a bit mask (bit i controls the i-th imFIFO */
+ /* ! within that group, that is the (group*32)+i imFIFO. */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionFifo enables or disables a subset of the 32 DMA reception FIFOs. */
+ /* ! The group argument is ignored and the mask argument is a bit mask (bit i controls the i-th reception FIFO). */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionHeaderFifo enables or disables the header FIFO for the specified */
+ /* ! group. The mask argument is ignored. Note that the header FIFO is typically used for debugging. */
+ /* ! */
+ /* ! An operation on facility BGCNS_InjectionFifoInterrupt enables or disables threshold interrupts for the */
+ /* ! specified injection FIFO. Threshold interrupts occur if available space is less than the configured */
+ /* ! threshold when the FIFO is used for a remote get operation. The group and mask arguments are as */
+ /* ! described in the BGCNS_InjectionFifo operation (above). */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionFifoInterrupt enables or disables interrupts for the specified */
+ /* ! reception FIFO(s). If enabled, an interrupt will occur when the reception FIFO's available space drops */
+ /* ! below the configured threshold. The group argument selects the interrupt type (type 0, 1, 2 or 3). */
+ /* ! The mask argument is a bit mask selecting one or more of the 32 normal reception FIFOs. */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionHeaderFifoInterrupt enables or disables interrupts for the specified */
+ /* ! reception header FIFO. Reception header FIFOs are used for debug purposes only. */
+ /* ! */
+ /* ! An operation on facility BGCNS_InjectionCounterInterrupt enables or disables "Counter Hit Zero" interrupts. */
+ /* ! The group argument does not specify counter group, but rather specifies interrupt 0, 1, 2 or 3. The mask */
+ /* ! argument is a bit mask that selects one or more counter subgroups to operate on (the 256 injection counters */
+ /* ! are partitioned into 32 subgroups of 8 counters). */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionCounterInterrupt enables or disables "Counter Hit Zero" interrupts */
+ /* ! for reception counters. The group and mask arguments are the as as described in the the */
+ /* ! BGCNS_InjectionCounterInterrupt operation (above). */
+ /* ! */
+ /* ! The buffer argument is used as a means to save/restore in an opaque manner. This is achieved by passing */
+ /* ! a non-NULL buffer to a disable operation and subsequently passing that buffer during a reenable */
+ /* ! operation (the buffer is used to snapshot state). */
+ /* ! */
+ /* ! */
+ /* ! @code */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | Facility | group | mask | Notes | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_InjectionFifo | 0..3 | 32 bits | [1] | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_ReceptionFifo | n/a | 32 bits | [2] | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_ReceptionHeaderFifo | 0..3, ALL | N/A | | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_InjectionFifoInterrupt | 0..3 | 32 bits | [1] | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_ReceptionFifoInterrupt | 0..3 | 32 bits | [3] | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_InjectionCounterInterrupt | 0..3 | 32 bits | [3][4]| */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_ReceptionCounterInterrupt | 0..3 | 32 bits | [3][4]| */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! */
+ /* ! [1] There are 128 injection FIFOs partitioned into 4 groups of 32. */
+ /* ! [2] There are 32 normal reception FIFOs in BG/P. */
+ /* ! [3] There are 4 interrupt lines. The group argument selects one these 4. */
+ /* ! [4] There are 256 counters of each type (injection and reception). The */
+ /* ! 32-bit mask partitions them into groups of 8. */
+ /* ! */
+ /* ! @endcode */
+ /* ! */
+ /* ! @param[in] operation defines the type of operation being performed (enable, disable, or re-enable). */
+ /* ! @param[in] facility defines the type of FIFO being configured. */
+ /* ! @param[in] group is interpreted differently based on the facility. */
+ /* ! @param[in] mask is interpreted differently based on the facility. */
+ /* ! @param[out] buffer is interpreted differently based on the operation and facility. It is generally used to capture */
+ /* ! a copy of the facility's current state in an enable operation (and may be null, in which case it is ignored). It is */
+ /* ! generally used as the value to be loaded in a re-enable operation. In this manner, a state value captured by an enable */
+ /* ! operation may be easily restored by a subsequent re-enable operation. The buffer argument is generally ignored by */
+ /* ! disable operations. */
+ int (*setDmaFifoControls)(BGCNS_FifoOperation operation, BGCNS_FifoFacility facility, unsigned group, unsigned mask, unsigned* buffer);
+
+ /* ! @brief Maps injection FIFOs onto physical (torus hardware) FIFOs. */
+ /* ! @param[in] group specifies the injection FIFO group. */
+ /* ! @param[in] fifoIds is an array of length numberOfFifos whose elements are the identifiers of the imFIFO (within that */
+ /* ! given group). */
+ /* ! @param[in] injection_map is an array of length numberOfFifos whose elements are 8-bit masks identifying which of the */
+ /* ! physical torus injection FIFOs are mapped. Bits 0-3 correspond to torus group 0, and bits 4-7 correspond to torus */
+ /* ! group 1. Bits 3 and 7 are the high priority FIFOs. */
+ /* ! @param[in] numberOfFifos describes the number of elements contained in the fifoIds and injection_map arguments. */
+ /* ! @return Zero if the map was properly set. Non-zero if it was not, including the case of illegal arguments. */
+ /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */
+ /* ! argument is 0..3 and the legal range for the fifoIds[] elements is 0..31. */
+
+ int (*setDmaInjectionMap)(unsigned group, unsigned fifoIds[], unsigned char injection_map[], unsigned numberOfFifos);
+
+ /* ! @brief Enables or disables "local copy" behavior for the specified injection FIFOs. A local copy injection FIFO */
+ /* ! can be used to perform memory copies within a node via the DMA engine. */
+ /* ! @param[in] operation specifies whether local copies is being enabled or disabled on the specified FIFOs. The BGCNS_Reenable */
+ /* ! operation is not supported. */
+ /* ! @param[in] group specifies the injection FIFO group. */
+ /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */
+ /* ! @return Zero if the operation succeeded; non-zero if it did not. */
+ /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */
+ /* ! argument is 0..3. */
+ int (*setDmaLocalCopies)(BGCNS_FifoOperation operation, unsigned group, unsigned bits);
+
+ /* ! @brief Enables or disables the priority bit for the specified injection FIFOs. The priority bit */
+ /* ! is used by the hardware arbitration (details are not further documented here). */
+ /* ! @param[in] operation specifies whether priority bits are being set or cleared. */
+ /* ! @param[in] group specifies the injection FIFO group. */
+ /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */
+ /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */
+ /* ! argument is 0..3. */
+ int (*setDmaPriority)(BGCNS_FifoOperation operation, unsigned group, unsigned bits);
+
+ /* ! @brief Sets the mapping from physical (torus hardware) reception FIFOs to reception FIFOs. The hardware supports */
+ /* ! 8 torus FIFOs (six torus dimensions plus high priority plus local copy). Furthermore, the hardware supports */
+ /* ! 4 groups as derived from the PID0 and PID1 bits of the DMA packet. Thus the mapping is a 4 x 8 matrix of */
+ /* ! reception FIFO ids. */
+ /* ! @param[in] torus_reception_map maps {group} X {torus-hardware-FIFOs} --> reception FIFOs. */
+ /* ! @param[in] fifo_types is an array of N values specifying the type of each normal reception FIFO (see also threshold). For BGP, */
+ /* ! N=2 (there are 32 normal reception FIFOs). */
+ /* ! @param[in] header_types is an array of N values specifying the type of each reception header FIFO (see also threshold). For */
+ /* ! BGP, N=4 (there are 4 reception header FIFOs). Note that reception header FIFOs are typically only used for debugging purposes. */
+ /* ! @param[in] threshold is an array of N threshold values. The value threshold[i] specifies the threshold value for reception */
+ /* ! FIFO type i. If reception FIFO interrupts are enabled (see setDmaFifoControls) and a reception FIFO's available space drops */
+ /* ! below its threshold, an interrupt is driven. For BGP, N=2 (there are type 0 and type 1 injection FIFOs). */
+ int (*setDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned header_types[], unsigned threshold[]);
+
+ /* ! @brief Gets the reception map. */
+ /* ! @see setDmaReceptionMap for descriptions of the map and arguments. */
+ int (*getDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned short* store_headers, unsigned header_types[], unsigned threshold[]);
+
+
+ /* ! @deprecated */
+ int (*_used_to_be_clearDmaFullReceptionFifo__removed)(void);
+
+
+ /* ! @brief Resets the MAC unit's PHY. */
+ /* ! @return Zero if the unit was properly reset. Returns non-zero if some error occurred. */
+ /* ! @deprecated See macResetPHY_nonBlocking. */
+ int (*macResetPHY)(void);
+
+ /* ! @brief Tests the MAC unit's link. */
+ /* ! @param[in] link_type specifies the type of link to be tested. */
+ /* ! @return One (1) if the link is active; zero (0) if it is not. */
+ /* ! @deprecated See macTestLink_nonBlocking */
+ int (*macTestLink)(BGCNS_LinkType link_type);
+
+ /* ! @brief Reads one of the MAC's XGMII registers. */
+ /* ! @param[in] device_address */
+ /* ! @param[in] port_address */
+ /* ! @param[in] register_address */
+ /* ! @return The register's value or a negative number if some error occurred. */
+ /* ! @deprecated Low level MAC register access is being eliminated. */
+ int (*macXgmiiRead)(unsigned device_address, unsigned port_address, unsigned register_address);
+
+ /* ! @brief Writes one of the MAC's XGMII registers. */
+ /* ! @param[in] device_address */
+ /* ! @param[in] port_address */
+ /* ! @param[in] register_address */
+ /* ! @param[in] value */
+ /* ! @return Zero (0) if the register was successfully written; non-zero if some error occurred. */
+ /* ! @deprecated Low level MAC register access is being eliminated. */
+ int (*macXgmiiWrite)(unsigned device_address, unsigned port_address, unsigned register_address, unsigned value);
+
+
+ /* ! @brief Trains SerDes in a non-blocking manner. The standard usage is to inititate */
+ /* ! training with trainSerDes(1), check the return code, and then continue to invoke */
+ /* ! trainSerDes(0) as long as the return code is BGCNS_RC_CONTINUE. */
+ /* ! @param[in] reset Should be 1 when initiating a retraining sequence and 0 for any */
+ /* ! continuations. */
+ /* ! @return BGCNS_RC_CONTINUE if training is still ongoing (the caller should re-invoke */
+ /* ! the service again (with reset=0). BGCNS_RC_COMPLETE if training is complete. */
+ /* ! BGCNS_ERROR if some error has occurred. */
+ int (*trainSerDes)(int reset);
+
+ /* ! @brief Fetches the value of the specified control parameter of the environmental monitor. */
+ /* ! @param[in] parameter Parameter to retrieve. Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */
+ /* ! @param[in] value Pointer to the storage location that will contain the parameter's value when the function successfully returns. */
+ /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */
+ int (*getEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int* value);
+
+ /* ! @brief Stores a value to the specified control parameter of the environmental monitor */
+ /* ! @param[in] parameter Parameter to store. Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */
+ /* ! @param[in] value New value for the parameter */
+ /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */
+ int (*setEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int value);
+
+ /* ! @brief Performs checks and ensures that the node will continue to operate within tolerances. */
+ /* ! @note MUST be called regularly as indicated by nextCallbackTime parameter */
+ /* ! @param[in] nextCallbackTime Upon returning, this will contain the PPC Timebase register value indicating when the next */
+ /* ! time the operating system needs to call performEnvMgmt. Failure to do so may result in poorly performing */
+ /* ! nodes or shutdown of the block / rack. */
+ int (*performEnvMgmt)(unsigned long long* nextCallbackTime);
+
+
+ /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */
+ /* ! response back from the control system. When this service is used, */
+ /* ! the caller must poll for completion using the testForOutboxCompletion */
+ /* ! service. */
+ /* ! @param[in] facility The facility (aka component). See bgp_ras.h for a list of facilities. */
+ /* ! @param[in] unit The unit (aka subcomponent). See bgp_ras.h for a list of units. */
+ /* ! @param[in] err_code The error code. See bgp_ras.h for a list of error code.s */
+ /* ! @param[in] numDetails The number of additional details. */
+ /* ! @param[in] details The list of additional details. */
+ /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+ int (*writeRASEvent_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] );
+
+ /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */
+ /* ! response back from the control system. When this service is used, */
+ /* ! the caller must poll for completion using the testForOutboxCompletion */
+ /* ! service. */
+ /* ! @param[in] facility The facility (aka component). See bgp_ras.h for a list of facilities. */
+ /* ! @param[in] unit The unit (aka subcomponent). See bgp_ras.h for a list of units. */
+ /* ! @param[in] err_code The error code. See bgp_ras.h for a list of error code.s */
+ /* ! @param[in] str The message string being written (ASCII encoded, null-terminated). Note that the length of this string is */
+ /* ! limited to _BGP_RAS_ASCII_MAX_LEN characters. The implementation may choose to truncate the string if it exceeds this */
+ /* ! length. */
+ /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */
+ /* ! string was truncated). */
+ /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+ int (*writeRASString_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, char* str );
+
+ /* ! @brief Sets the core's timebase registers to the specified value. */
+ /* ! @param[in] newtime The new 64-bit timebase */
+ /* ! @return Zero if the timebase was successfully set, non-zero if some error condition occurred. */
+ /* ! @deprecated */
+ int (*synchronizeTimebase)(unsigned long long newtime);
+
+ /* ! @brief Sets the node's DMA physical protection settings. */
+ /* ! @note on BGP, there are a maximum of 8 read ranges and 8 write ranges */
+ /* ! @return Zero if the DMA ranges were set, non-zero if some error condition occurred. */
+ int (*dmaSetRange)(unsigned numreadranges, unsigned long long* read_lower_paddr, unsigned long long* read_upper_paddr,
+ unsigned numwriteranges, unsigned long long* write_lower_paddr, unsigned long long* write_upper_paddr);
+
+ /* ! @brief Checks the status of the devices and reports correctible RAS (if any) */
+ /* ! @param[in] clear_error_counts If non-zero, function will also reset the hardware error counters after posting any RAS. */
+ /* ! @return Zero if successful, non-zero if some error condition occurred. */
+ int (*statusCheck)(unsigned clear_error_counts);
+
+ /* ! @brief Stops the DMA and clears any reception unit failure */
+ int (*stopDma)(void);
+
+ /* ! @brief Starts the DMA */
+ int (*startDma)(void);
+
+ /* ! @brief Performs a hard exit. The status code is provided to the control system. */
+ /* ! @return This service never returns. */
+ void (*exit)(int rc);
+
+ /* ! @brief Resets the MAC unit's PHY but does not block. */
+ /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+ /* ! reset sequence. That is, callers should initiate a reset sequence with reset=1 and then */
+ /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this servicate again with */
+ /* ! reset=0. */
+ /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+ /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+ /* ! to indicate that it needs additional time. */
+ /* ! @return BGCNS_RC_COMPLETE if the unit was properly reset. BGCNS_RC_CONTINUE if the reset operation is */
+ /* ! not yet complete. BGCNS_RC_ERROR if the reset operation failed. */
+ int (*macResetPHY_nonBlocking)(int reset, unsigned timeoutInMillis);
+
+ /* ! @brief Tests the MAC unit's link but does not block. */
+ /* ! @param[in] link_type specifies the type of link to be tested. */
+ /* ! @param[out] result points to the link status, which is valid only when the return code is */
+ /* ! BGCNS_RC_COMPLETE. A value of one (1) indicates that the link is active; zero (0) */
+ /* ! indicates that it is inactive. */
+ /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+ /* ! test link sequence. That is, callers should initiate a sequence with reset=1 and then */
+ /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this service again with */
+ /* ! reset=0. */
+ /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+ /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+ /* ! to indicate that it needs additional time. */
+ /* ! @return BGCNS_RC_COMPLETE if the test is complete (result is valid only in this case). BGCNS_RC_CONTINUE */
+ /* ! if the reset operation is not yet complete. BGCNS_RC_ERROR if the reset operation failed. */
+ int (*macTestLink_nonBlocking)(BGCNS_LinkType link_type, unsigned* result, int reset, unsigned timeoutInMillis);
+
+ void * _not_in_use_1068;
+ void * _not_in_use_1069;
+
+
+ /* ! @brief Indicates that a new job is about to start. */
+ /* ! @return Zero (0) if CNS is ready for a new job to start. Returns non-zero otherwise. */
+ int (*startNextJob)(void);
+
+ /* ! @brief Indicates that the CNS should use the specified virtual address when accessing the */
+ /* ! given device. When a device is remapped, CNS will no longer make any attempt to map */
+ /* ! a TLB to access that device -- it is the responsibility of the kernel to handle the */
+ /* ! TLB either proactively or reactively (via a fault). */
+ /* ! @param[in] device specifies the device being mapped. */
+ /* ! @param[in] base_address is the root virtual address of the device. The address should be */
+ /* ! naturally aligned (relative to the size of the device). See the seciton Reserved and */
+ /* ! Preferred Addresses for more information. */
+ /* ! @return Zero (0) if the device was successfully remapped. Returns non-zero if it was not. */
+ /* ! @remarks The lock box is in active use by CNS during early boot and thus it is not */
+ /* ! possible to remap the BGCNS_LockBox device until all cores are activated by the kernel */
+ /* ! (that is, takeCPU has been called for all cores). */
+ int (*mapDevice)(BGCNS_DeviceMasks device, void* base_address);
+
+ /* ! @brief Enables barriers on the specified channel. */
+ /* ! @param channel specifies the channel being enabled. */
+ /* ! @param user_mode indicates whether the barrier is to be used in user-mode code. */
+ /* ! @return Zero if global barriers were enabled. Returns non-zero if the request could not be */
+ /* ! completed, including the case of attempting to enable a reserved channel. */
+ int (*enableBarrier)(unsigned int channel, int user_mode);
+
+ /* ! @brief Disables barriers on the specified channel. */
+ /* ! @return Zero if global barriers were disabled. Returnsnon-zero if the request could not be */
+ /* ! completed, including the case of attempting to disable a reserved channel. */
+ int (*disableBarrier)(unsigned int channel);
+
+ /* ! @brief A global barrier that does not block indefinitely. */
+ /* ! @param channel indicates the GLINT hardware channel to use. */
+ /* ! @param reset indicates whether this is the beginning (1) or a continuation (0) of a barrier */
+ /* ! sequence. That is, caller should inititate a barrier operation by passing reset=1 and then, */
+ /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke the service again with */
+ /* ! reset=0. */
+ /* ! @param timeoutInMillis is the (approximate) number of milliseconds that this service is allowed */
+ /* ! to wait for barrier participants before returning to the caller. */
+ /* ! @return BGCNS_RC_COMPLETE indicates that all participants have arrived at the barrier. BGCNS_RC_CONTINUE */
+ /* ! indicates that not all partipants arrived within the alloted timeout period. BGCNS_RC_ERROR */
+ /* ! indicates that other problem has been detected. */
+ /* ! @remarks This service is not thread safe. It is considered a programming error to invoke it */
+ /* ! from multiple threads concurrently and the behavior is not defined. */
+ int (*globalBarrier_nonBlocking)(unsigned channel, int reset, unsigned timeoutInMillis);
+
+ /* ! @brief Restart kernel in cycle reproducibility mode. */
+ /* ! @return Zero if no restart was required for reproducibility. */
+ /* ! @remarks This service must be called from each core and only after all I/O operations have been completed. */
+ /* ! Processors will be reset and kernels will start again. */
+ int (*setupReproducibility)(void);
+
+} BGCNS_ServiceDirectory;
+
+/* ! @deprecated */
+/* ! @typedef BGCNS_DeprecatedServicesDirectory */
+/* ! @struct _BGCNS_DeprecatedServices */
+/* ! @brief These services exist for historical reasons and are not further documented here. */
+/* ! They may not be available in future releases of CNS. */
+typedef struct _BGCNS_DeprecatedServices {
+ int (*torusTermCheck)(int* nonFatalRc);
+ int (*torusLinkErrCheck)(int* nonFatalRc);
+ int (*torusCRCExchange)(void);
+ int (*collectiveConfigureClassInternal)(unsigned virtualTree, unsigned short specifier);
+ int (*collectiveConfigureClass)(unsigned virtualTree, unsigned short specifier);
+ unsigned (*collectiveGetClass)(unsigned virtualTree);
+ int (*collectiveInit)(void);
+ int (*collectiveRelease)(void);
+ int (*collectiveHardReset)(void);
+ int (*netbusTermCheck)(void);
+ unsigned (*getSerDesLinkStatus)(void);
+ int (*dmaTermCheck)(void);
+} BGCNS_DeprecatedServicesDirectory;
+
+/* ! @typedef BGCNS_Descriptor */
+/* ! @struct _BGCNS_Descriptor */
+/* ! @brief The Common Node Services descriptor. This descriptor provides information to the kernel regarding */
+/* ! the CNS memory region as well as a service directory. The descriptor is passed to the kernel */
+/* ! upon boot and must not be altered by the kernel. */
+typedef struct _BGCNS_Descriptor {
+ BGCNS_ServiceDirectory* services; //!< A pointer to the services directory.
+ unsigned baseVirtualAddress; //!< The virtual address of the beginning of the CNS memory region.
+ unsigned size; //!< The size (in bytes) of the CNS memory region.
+ unsigned basePhysicalAddress; //!< The physical address of the CNS memory region.
+ unsigned basePhysicalAddressERPN; //!< The extended real page number of the CNS memory region.
+ unsigned bgcns_private_in_use; //!< Undefined. This field is for internal use only and may disappear at any time.
+ BGCNS_DeprecatedServicesDirectory* deprecatedServices; //!< @deprecated undocumented
+ unsigned version; //!< The CNS version
+} BGCNS_Descriptor;
+
+
+
+#endif /* !__ASSEMBLY */
+#endif /* _BGCNS_H */
diff --git a/arch/powerpc/boot/bgp.c b/arch/powerpc/boot/bgp.c
new file mode 100644
index 00000000000000..9aefcb125732bb
--- /dev/null
+++ b/arch/powerpc/boot/bgp.c
@@ -0,0 +1,166 @@
+/*
+ * (C) Copyright IBM Corp. 2007, 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Based on earlier code:
+ * Copyright (C) Paul Mackerras 1997.
+ *
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003, 2004 Zultys Technologies
+ *
+ * David Gibson, IBM Corporation, 2007
+ *
+ */
+#include "types.h"
+#include "ops.h"
+#include "stdio.h"
+#include "4xx.h"
+#include "44x.h"
+#include "bgcns.h"
+/* Types needed for the personality */
+typedef u8 uint8_t;
+typedef u16 uint16_t;
+typedef u32 uint32_t;
+#include "bgp_personality.h"
+
+/* Blue Gene firmware jumps to 0x10.
+ * Simply branch to _zimage_start which is typically 0x800000.
+ * Must also link with --section-start bgstart=0
+ */
+asm (
+" .section bgstart, \"ax\"; "
+" .=0x10; "
+" lis %r9, _zimage_start@h; "
+" ori %r9, %r9, _zimage_start@l; "
+" mtlr %r9; "
+" blr; "
+" .previous "
+);
+
+/* This will point directly to CNS which remains mapped on entry. */
+BGCNS_Descriptor* cns;
+
+static void bgp_console_write(const char *msg, int len) __attribute__((unused)) ;
+
+static void bgp_console_write(const char *msg, int len)
+{
+ if (cns)
+ cns->services->writeToMailboxConsole((char *)msg, len);
+}
+
+static void bgp_fixup_bluegene_cns(BGCNS_Descriptor *cns)
+{
+ void *node = finddevice("/ibm,bluegene/cns");
+ if (node) {
+ setprop_val(node, "base-va", cns->baseVirtualAddress);
+ setprop_val(node, "base-pa", cns->basePhysicalAddress);
+ setprop_val(node, "size", cns->size);
+ setprop_val(node, "services", cns->services);
+ setprop_val(node, "version", cns->version);
+ } else {
+ fatal("could not find /ibm,bluegene/cns node in device tree");
+ }
+}
+
+static void bgp_fixup_bluegene_personality(BGP_Personality_t *bgpers)
+{
+ void *node = finddevice("/ibm,bluegene/personality");
+ if (node) {
+ /* We could include individual fields of the personality as needed
+ * so that Linux doesn't need to decode the struct directly. We
+ * provide raw-data for external tools and daemons.
+ * This can replace /proc/personality
+ */
+ unsigned frequency = bgpers->Kernel_Config.FreqMHz * 1000000;
+ setprop(node, "raw-data", bgpers, sizeof(*bgpers));
+ setprop_val(node, "frequency", frequency);
+ } else {
+ fatal("could not find /ibm,bluegene/personality node in device tree");
+ }
+}
+
+static void bgp_fixup_bluegene_initrd(void)
+{
+ void *node = finddevice("/chosen");
+ if (node) {
+ /* On Blue Gene we may have a gzipped ramdisk loaded at a fixed
+ * address (0x1000000). It is preceeded by a 4-byte magic value and a
+ * 4-byte big endian length.
+ */
+ unsigned *rd = (unsigned *)0x1000000; /* 16M */
+
+ if (rd[0] == 0xf0e1d2c3 && rd[1] != 0) {
+ unsigned initrd_start = (unsigned)(rd+2);
+ unsigned initrd_len = rd[1];
+ unsigned initrd_end = initrd_start + initrd_len;
+ setprop_val(node, "linux,initrd-start", initrd_start);
+ setprop_val(node, "linux,initrd-end", initrd_end);
+ }
+ } else {
+ fatal("could not find chosen node in device tree");
+ }
+}
+
+static void bgp_fixups(void)
+{
+ BGP_Personality_t *bgpers = cns->services->getPersonalityData();
+ unsigned int DDRSize = (bgpers->DDR_Config.DDRSizeMB << 20) - cns->size;
+ unsigned int freq = bgpers->Kernel_Config.FreqMHz * 1000000;
+
+/* For vRNIC configurations, turn down the memory that Linux thinks is on the node so the vRNIC can map it all */
+ if ( (DDRSize & 0xf0000000 ) == 0xd0000000 ) DDRSize = 0xb0000000 ;
+
+ dt_fixup_memory(0, DDRSize);
+ dt_fixup_cpu_clocks(freq, freq, freq);
+
+ bgp_fixup_bluegene_cns(cns);
+ bgp_fixup_bluegene_personality(bgpers);
+ bgp_fixup_bluegene_initrd();
+
+#if 0
+ /* FIXME: sysclk should be derived by reading the FPGA registers */
+ unsigned long sysclk = 33000000;
+
+ ibm440gp_fixup_clocks(sysclk, 6 * 1843200);
+ ibm4xx_sdram_fixup_memsize();
+ dt_fixup_mac_address_by_alias("ethernet0", ebony_mac0);
+ dt_fixup_mac_address_by_alias("ethernet1", ebony_mac1);
+ ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+ ebony_flashsel_fixup();
+#endif
+}
+
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7)
+{
+ cns = (BGCNS_Descriptor*) r3;
+#if defined(CONFIG_BLUEGENE_NOISY_BOOT)
+ console_ops.write = bgp_console_write;
+#endif
+
+ simple_alloc_init(_end, 256 << 20, 32, 64);
+
+ platform_ops.fixups = bgp_fixups;
+ platform_ops.exit = ibm44x_dbcr_reset;
+ fdt_init(_dtb_start);
+
+/* serial_console_init(); */
+}
diff --git a/arch/powerpc/boot/bgp_personality.h b/arch/powerpc/boot/bgp_personality.h
new file mode 100644
index 00000000000000..f4d9309640a4bf
--- /dev/null
+++ b/arch/powerpc/boot/bgp_personality.h
@@ -0,0 +1,1086 @@
+/*
+ * Andrew Tauferner
+ *
+ * Copyright 2006, 2007 International Business Machines
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef BGP_PERSONALITY_H_ // Prevent multiple inclusion
+#define BGP_PERSONALITY_H_
+
+
+
+
+/* #include <linux/types.h> */
+
+// These defines allows use of IBM's bit numberings (MSb=0, LSb=31)for multi-bit fields
+// b = IBM bit number of the least significant bit (highest number)
+// x = value to set in field
+// s = size
+#define _BS(b,x,s)( ( ( x) & ( 0x7FFFFFFF>> ( 31- ( s)))) << ( 31- ( b)))
+#define _BG(b,x,s)( ( _BS(b,0x7FFFFFFF,s) & x ) >> (31-b) )
+#define _BS64(b,x,s)( ( ( x) & ( 0x7FFFFFFFFFFFFFFFLL>> ( 63- ( s)))) << ( 63- ( b)))
+#define _BG64(b,x,s)( ( _BS64(b, 0x7FFFFFFFFFFFFFFFLL,s) & x ) >> (63-b) )
+#define _BN(b) ((1<<(31-(b))))
+#define _B1(b,x) (((x)&0x1)<<(31-(b)))
+#define _B2(b,x) (((x)&0x3)<<(31-(b)))
+#define _B3(b,x) (((x)&0x7)<<(31-(b)))
+#define _B4(b,x) (((x)&0xF)<<(31-(b)))
+#define _B5(b,x) (((x)&0x1F)<<(31-(b)))
+#define _B6(b,x) (((x)&0x3F)<<(31-(b)))
+#define _B7(b,x) (((x)&0x7F)<<(31-(b)))
+#define _B8(b,x) (((x)&0xFF)<<(31-(b)))
+#define _B9(b,x) (((x)&0x1FF)<<(31-(b)))
+#define _B10(b,x) (((x)&0x3FF)<<(31-(b)))
+#define _B11(b,x) (((x)&0x7FF)<<(31-(b)))
+#define _B12(b,x) (((x)&0xFFF)<<(31-(b)))
+#define _B13(b,x) (((x)&0x1FFF)<<(31-(b)))
+#define _B14(b,x) (((x)&0x3FFF)<<(31-(b)))
+#define _B15(b,x) (((x)&0x7FFF)<<(31-(b)))
+#define _B16(b,x) (((x)&0xFFFF)<<(31-(b)))
+#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b)))
+#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b)))
+#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b)))
+#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b)))
+#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b)))
+#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b)))
+#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b)))
+#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b)))
+#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b)))
+#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b)))
+#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b)))
+#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b)))
+#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b)))
+#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b)))
+#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b)))
+
+#define BGP_UCI_Component_Rack ( 0)
+#define BGP_UCI_Component_Midplane ( 1)
+#define BGP_UCI_Component_BulkPowerSupply ( 2)
+#define BGP_UCI_Component_PowerCable ( 3)
+#define BGP_UCI_Component_PowerModule ( 4)
+#define BGP_UCI_Component_ClockCard ( 5)
+#define BGP_UCI_Component_FanAssembly ( 6)
+#define BGP_UCI_Component_Fan ( 7)
+#define BGP_UCI_Component_ServiceCard ( 8)
+#define BGP_UCI_Component_LinkCard ( 9)
+#define BGP_UCI_Component_LinkChip (10)
+#define BGP_UCI_Component_LinkPort (11) // Identifies 1 end of a LinkCable
+#define BGP_UCI_Component_NodeCard (12)
+#define BGP_UCI_Component_ComputeCard (13)
+#define BGP_UCI_Component_IOCard (14)
+#define BGP_UCI_Component_DDRChip (15)
+#define BGP_UCI_Component_ENetConnector (16)
+
+typedef struct BGP_UCI_Rack_t
+ { // "Rxy": R<RackRow><RackColumn>
+ unsigned Component : 5; // when BGP_UCI_Component_Rack
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned _zero : 19; // zero's
+ }
+ BGP_UCI_Rack_t;
+
+#define BGP_UCI_RACK_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Rack
+#define BGP_UCI_RACK_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_RACK_RACKCOLUMN(x) _B4(12,x) // 0..F
+
+
+
+typedef struct BGP_UCI_Midplane_t
+ { // "Rxy-Mm": R<RackRow><RackColumn>-M<Midplane>
+ unsigned Component : 5; // when BGP_UCI_Component_Midplane
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned _zero : 18; // zero's
+ }
+ BGP_UCI_Midplane_t;
+
+#define BGP_UCI_MIDPLANE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Midplane
+#define BGP_UCI_MIDPLANE_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_MIDPLANE_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_MIDPLANE_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+
+
+typedef struct BGP_UCI_BulkPowerSupply_t
+ { // "Rxy-B": R<RackRow><RackColumn>-B
+ unsigned Component : 5; // when BGP_UCI_Component_BulkPowerSupply
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned _zero : 19; // zero's
+ }
+ BGP_UCI_BulkPowerSupply_t;
+
+#define BGP_UCI_BULKPOWERSUPPLY_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_BulkPowerSupply
+#define BGP_UCI_BULKPOWERSUPPLY_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_BULKPOWERSUPPLY_RACKCOLUMN(x) _B4(12,x) // 0..F
+
+
+
+typedef struct BGP_UCI_PowerCable_t
+ { // "Rxy-B-C": R<RackRow><RackColumn>-B-C
+ unsigned Component : 5; // when BGP_UCI_Component_PowerCable
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned _zero : 19; // zero's
+ }
+ BGP_UCI_PowerCable_t;
+
+#define BGP_UCI_POWERCABLE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerCable
+#define BGP_UCI_POWERCABLE_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_POWERCABLE_RACKCOLUMN(x) _B4(12,x) // 0..F
+
+
+
+typedef struct BGP_UCI_PowerModule_t
+ { // "Rxy-B-Pp": R<RackRow><RackColumn>-B-P<PowerModule>
+ unsigned Component : 5; // when BGP_UCI_Component_PowerModule
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned PowerModule : 3; // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear)
+ unsigned _zero : 16; // zero's
+ }
+ BGP_UCI_PowerModule_t;
+
+#define BGP_UCI_POWERMODULE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerModule
+#define BGP_UCI_POWERMODULE_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_POWERMODULE_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_POWERMODULE_POWERMODULE(x) _B3(15,x) // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear)
+
+
+typedef struct BGP_UCI_ClockCard_t
+ { // "Rxy-K": R<RackRow><RackColumn>-K
+ unsigned Component : 5; // when BGP_UCI_Component_ClockCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned _zero : 19; // zero's
+ }
+ BGP_UCI_ClockCard_t;
+
+#define BGP_UCI_CLOCKCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerModule
+#define BGP_UCI_CLOCKCARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_CLOCKCARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+
+
+
+typedef struct BGP_UCI_FanAssembly_t
+ { // "Rxy-Mm-Aa": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>
+ unsigned Component : 5; // when BGP_UCI_Component_FanAssembly
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned FanAssembly : 4; // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+ unsigned _zero : 14; // zero's
+ }
+ BGP_UCI_FanAssembly_t;
+
+#define BGP_UCI_FANASSEMBLY_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_FanAssembly
+#define BGP_UCI_FANASSEMBLY_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_FANASSEMBLY_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_FANASSEMBLY_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_FANASSEMBLY_FANASSEMBLY(x) _B4(17,x) // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+
+
+
+typedef struct BGP_UCI_Fan_t
+ { // "Rxy-Mm-Aa-Ff": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>-F<Fan>
+ unsigned Component : 5; // when BGP_UCI_Component_Fan
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned FanAssembly : 4; // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+ unsigned Fan : 2; // 0..2 (0=Tailstock, 2=Midplane)
+ unsigned _zero : 12; // zero's
+ }
+ BGP_UCI_Fan_t;
+
+#define BGP_UCI_FAN_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Fan
+#define BGP_UCI_FAN_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_FAN_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_FAN_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_FAN_FANASSEMBLY(x) _B4(17,x) // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+#define BGP_UCI_FAN_FAN(x) _B2(19,x) // 0..2 (0=Tailstock, 2=Midplane)
+
+typedef struct BGP_UCI_ServiceCard_t
+ { // "Rxy-Mm-S": R<RackRow><RackColumn>-M<Midplane>-S
+ unsigned Component : 5; // when BGP_UCI_Component_ServiceCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top (Master ServiceCard in M0)
+ unsigned _zero : 18; // zero's
+ }
+ BGP_UCI_ServiceCard_t;
+
+#define BGP_UCI_SERVICECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ServiceCard
+#define BGP_UCI_SERVICECARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_SERVICECARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_SERVICECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top (Master ServiceCard in M0)
+
+
+
+typedef struct BGP_UCI_LinkCard_t
+ { // "Rxy-Mm-Ll": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>
+ unsigned Component : 5; // when BGP_UCI_Component_LinkCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+ unsigned _zero : 16; // zero's
+ }
+ BGP_UCI_LinkCard_t;
+
+#define BGP_UCI_LINKCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkCard
+#define BGP_UCI_LINKCARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_LINKCARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_LINKCARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_LINKCARD_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+
+
+
+typedef struct BGP_UCI_LinkChip_t
+ { // "Rxy-Mm-Ll-Uu": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-U<LinkChip>
+ unsigned Component : 5; // when BGP_UCI_Component_LinkChip
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+ unsigned LinkChip : 3; // 00..05: left to right from Front
+ unsigned _zero : 13; // zero's
+ }
+ BGP_UCI_LinkChip_t;
+
+#define BGP_UCI_LINKCHIP_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkChip
+#define BGP_UCI_LINKCHIP_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_LINKCHIP_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_LINKCHIP_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_LINKCHIP_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+#define BGP_UCI_LINKCHIP_LINKCHIP(x) _B3(18,x) // 00..05: left to right from Front
+
+typedef struct BGP_UCI_LinkPort_t
+ { // "Rxy-Mm-Ll-Jjj": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-J<LinkPort>
+ unsigned Component : 5; // when BGP_UCI_Component_LinkPort
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+ unsigned LinkPort : 4; // 00..15: left to right from Front
+ unsigned _zero : 12; // zero's
+ }
+ BGP_UCI_LinkPort_t;
+
+#define BGP_UCI_LINKPORT_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkPort
+#define BGP_UCI_LINKPORT_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_LINKPORT_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_LINKPORT_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_LINKPORT_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+#define BGP_UCI_LINKPORT_LINKPORT(x) _B4(19,x) // 00..15: left to right from Front
+
+
+typedef struct BGP_UCI_NodeCard_t
+ { // "Rxy-Mm-Nnn": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>
+ unsigned Component : 5; // when BGP_UCI_Component_NodeCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned _zero : 14; // zero's
+ }
+ BGP_UCI_NodeCard_t;
+
+#define BGP_UCI_NODECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_NodeCard
+#define BGP_UCI_NODECARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_NODECARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_NODECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_NODECARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+
+
+
+typedef struct BGP_UCI_ComputeCard_t
+ { // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>
+ unsigned Component : 5; // when BGP_UCI_Component_ComputeCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned ComputeCard : 6; // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+ unsigned _zero : 8; // zero's
+ }
+ BGP_UCI_ComputeCard_t;
+
+#define BGP_UCI_COMPUTECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ComputeCard
+#define BGP_UCI_COMPUTECARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_COMPUTECARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_COMPUTECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_COMPUTECARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_COMPUTECARD_COMPUTECARD(x) _B6(23,x) // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+
+
+typedef struct BGP_UCI_IOCard_t
+ { // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>
+ unsigned Component : 5; // when BGP_UCI_Component_IOCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned ComputeCard : 6; // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+ unsigned _zero : 8; // zero's
+ }
+ BGP_UCI_IOCard_t;
+
+#define BGP_UCI_IOCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_IOCard
+#define BGP_UCI_IOCARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_IOCARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_IOCARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_IOCARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_IOCARD_COMPUTECARD(x) _B6(23,x) // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+
+
+
+typedef struct BGP_UCI_DDRChip_t
+ { // "Rxy-Mm-Nnn-Jxx-Uuu": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>-U<DDRChip>
+ unsigned Component : 5; // when BGP_UCI_Component_DDRChip
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned ComputeCard : 6; // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+ unsigned DDRChip : 5; // 00..20
+ unsigned _zero : 3; // zero's
+ }
+ BGP_UCI_DDRChip_t;
+
+#define BGP_UCI_DDRCHIP_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_DDRChip
+#define BGP_UCI_DDRCHIP_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_DDRCHIP_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_DDRCHIP_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_DDRCHIP_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_DDRCHIP_COMPUTECARD(x) _B6(23,x) // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+#define BGP_UCI_DDRCHIP_DDRCHIP(x) _B5(28,x) // 00..20
+
+
+typedef struct BGP_UCI_ENetConnector_t
+ { // "Rxy-Mm-Nnn-ENe": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-EN<EN>
+ unsigned Component : 5; // when BGP_UCI_Component_ENetConnector
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned EN : 1; // 0..1 (Equal to IOCard number)
+ unsigned _zero : 13; // zero's
+ }
+ BGP_UCI_ENetConnector_t;
+
+#define BGP_UCI_ENETCONNECTOR_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ENetConnector
+#define BGP_UCI_ENETCONNECTOR_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_ENETCONNECTOR_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_ENETCONNECTOR_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_ENETCONNECTOR_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_ENETCONNECTOR_ENETCONNECTOR(x) _B1(18,x) // 0..1 (Equal to IOCard number)
+
+
+
+typedef union TBGP_UniversalComponentIdentifier
+ {
+ uint32_t UCI;
+ BGP_UCI_Rack_t Rack;
+ BGP_UCI_Midplane_t Midplane;
+ BGP_UCI_BulkPowerSupply_t BulkPowerSupply;
+ BGP_UCI_PowerCable_t PowerCable;
+ BGP_UCI_PowerModule_t PowerModule;
+ BGP_UCI_ClockCard_t ClockCard;
+ BGP_UCI_FanAssembly_t FanAssembly;
+ BGP_UCI_Fan_t Fan;
+ BGP_UCI_ServiceCard_t ServiceCard;
+ BGP_UCI_LinkCard_t LinkCard;
+ BGP_UCI_LinkChip_t LinkChip;
+ BGP_UCI_LinkPort_t LinkPort;
+ BGP_UCI_NodeCard_t NodeCard;
+ BGP_UCI_ComputeCard_t ComputeCard;
+ BGP_UCI_IOCard_t IOCard;
+ BGP_UCI_DDRChip_t DDRChip;
+ BGP_UCI_ENetConnector_t ENetConnector;
+ }
+ BGP_UniversalComponentIdentifier;
+
+
+
+#define BGP_PERSONALITY_VERSION (0x0A)
+
+#define BGP_DEFAULT_FREQ (850)
+
+#define BGP_PERS_PROCESSCONFIG_DIAGS (0xFF000000) // Diagnostic Mode: All Cores Enabled and Privileged in Process 0
+#define BGP_PERS_PROCESSCONFIG_SMP (0x0F000000) // All Cores Enabled User-Space in Process 0
+#define BGP_PERS_PROCESSCONFIG_VNM (0x08040201) // 4 Single-Core Processes (a.k.a. Virtual Nodes)
+#define BGP_PERS_PROCESSCONFIG_2x2 (0x0C030000) // 2 Processes of 2 Cores each in same DP unit
+#define BGP_PERS_PROCESSCONFIG_2x2_CROSS1 (0x09060000) // 2 Processes of 2 Cores in different DP units
+#define BGP_PERS_PROCESSCONFIG_2x2_CROSS2 (0x0A050000) // 2 Processes of 2 Cores in different DP units
+#define BGP_PERS_PROCESSCONFIG_3PLUS1 (0x0E010000) // 3 Cores in one Processes, 4th Core in Separate Process
+#define BGP_PERS_PROCESSCONFIG_DEFAULT (BGP_PERS_PROCESSCONFIG_DIAGS)
+
+
+// Personality.Kernel_Config.RASPolicy
+#define BGP_PERS_RASPOLICY_VERBOSITY(x) _B2( 1,x) // Verbosity as shown below
+#define BGP_PERS_RASPOLICY_MINIMAL BGP_PERS_RASPOLICY_VERBOSITY(0) // Benchmarking Level of Capture and Reporting
+#define BGP_PERS_RASPOLICY_NORMAL BGP_PERS_RASPOLICY_VERBOSITY(1) // Normal Production Level of Capture and Reporting
+#define BGP_PERS_RASPOLICY_VERBOSE BGP_PERS_RASPOLICY_VERBOSITY(2) // Manufacturing Test and Diagnostics
+#define BGP_PERS_RASPOLICY_EXTREME BGP_PERS_RASPOLICY_VERBOSITY(3) // Report Every Event Immediately - Thresholds set to 1
+#define BGP_PERS_RASPOLICY_FATALEXIT _BN( 2) // Fatal is Fatal, so exit.
+
+#define BGP_PERS_RASPOLICY_DEFAULT (BGP_PERS_RASPOLICY_VERBOSE | BGP_PERS_RASPOLICY_FATALEXIT)
+
+
+#define BGP_PERSONALITY_LEN_NFSDIR (32) // 32bytes
+
+#define BGP_PERSONALITY_LEN_SECKEY (32) // 32bytes
+
+// Personality.NodeConfig Driver Enables and Configurations
+#define BGP_PERS_ENABLE_Simulation _BN( 0) // Running on VHDL Simulation
+#define BGP_PERS_ENABLE_LockBox _BN( 1)
+#define BGP_PERS_ENABLE_BIC _BN( 2)
+#define BGP_PERS_ENABLE_DDR _BN( 3) // DDR Controllers (not Fusion DDR model)
+#define BGP_PERS_ENABLE_LoopBack _BN( 4) // LoopBack: Internal TS/TR or SerDes Loopback
+#define BGP_PERS_ENABLE_GlobalInts _BN( 5)
+#define BGP_PERS_ENABLE_Collective _BN( 6) // Enable Collective Network
+#define BGP_PERS_ENABLE_Torus _BN( 7)
+#define BGP_PERS_ENABLE_TorusMeshX _BN( 8) // Torus is a Mesh in the X-dimension
+#define BGP_PERS_ENABLE_TorusMeshY _BN( 9) // Torus is a Mesh in the Y-dimension
+#define BGP_PERS_ENABLE_TorusMeshZ _BN(10) // Torus is a Mesh in the Z-dimension
+#define BGP_PERS_ENABLE_TreeA _BN(11) // Enable Collective Network A-link
+#define BGP_PERS_ENABLE_TreeB _BN(12) // Enable Collective Network B-link
+#define BGP_PERS_ENABLE_TreeC _BN(13) // Enable Collective Network C-link
+#define BGP_PERS_ENABLE_DMA _BN(14)
+#define BGP_PERS_ENABLE_SerDes _BN(15)
+#define BGP_PERS_ENABLE_UPC _BN(16)
+#define BGP_PERS_ENABLE_EnvMon _BN(17)
+#define BGP_PERS_ENABLE_Ethernet _BN(18)
+#define BGP_PERS_ENABLE_JTagLoader _BN(19) // Converse with JTag Host to load kernel
+#define BGP_PERS_ENABLE_MailBoxReceive BGP_PERS_ENABLE_JTagLoader
+#define BGP_PERS_ENABLE_PowerSave _BN(20) // Turn off unused devices (Eth on CN, TS on ION)
+#define BGP_PERS_ENABLE_FPU _BN(21) // Enable Double-Hummers (not supported in EventSim)
+#define BGP_PERS_ENABLE_StandAlone _BN(22) // Disable "CIOD" interface, Requires Collective!
+#define BGP_PERS_ENABLE_TLBMisses _BN(23) // TLB Misses vs Wasting Memory (see bgp_AppSetup.c)
+#define BGP_PERS_ENABLE_Mambo _BN(24) // Running under Mambo? Used by Linux
+#define BGP_PERS_ENABLE_TreeBlast _BN(25) // Enable Tree "Blast" mode
+#define BGP_PERS_ENABLE_BlindStacks _BN(26) // For "XB" Tests, Lock 16K Stacks in Blind Device
+#define BGP_PERS_ENABLE_CNK_Malloc _BN(27) // Enable Malloc Support in CNK.
+#define BGP_PERS_ENABLE_Reproducibility _BN(28) // Enable Cycle Reproducibility
+#define BGP_PERS_ENABLE_HighThroughput _BN(29) // Enable high throughput computing mode
+#define BGP_PERS_ENABLE_DiagnosticsMode _BN(30) // Enable diagnostics mode
+
+// Configure L1+L2 into BG/L Mode (s/w managed L1 coherence, write-back)
+// This overrides most L1, L2, and Snoop settings. Carefull!!
+#define BGP_PERS_ENABLE_BGLMODE _BN(31) // (not yet fully implemented)
+
+// Default Setup for Simulation: Torus Meshes, DMA, SerDes, Ethernet, JTagLoader, PowerSave
+#define BGP_PERS_NODECONFIG_DEFAULT (BGP_PERS_ENABLE_Simulation |\
+ BGP_PERS_ENABLE_LockBox |\
+ BGP_PERS_ENABLE_BIC |\
+ BGP_PERS_ENABLE_DDR |\
+ BGP_PERS_ENABLE_LoopBack |\
+ BGP_PERS_ENABLE_GlobalInts |\
+ BGP_PERS_ENABLE_Collective |\
+ BGP_PERS_ENABLE_Torus |\
+ BGP_PERS_ENABLE_UPC |\
+ BGP_PERS_ENABLE_EnvMon |\
+ BGP_PERS_ENABLE_FPU |\
+ BGP_PERS_ENABLE_StandAlone)
+
+// Default Setup for Hardware:
+// Supports Stand-Alone CNA Applications.
+// Bootloader-Extensions and XB's must turn-off JTagLoader
+#define BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE (BGP_PERS_ENABLE_JTagLoader |\
+ BGP_PERS_ENABLE_LockBox |\
+ BGP_PERS_ENABLE_BIC |\
+ BGP_PERS_ENABLE_DDR |\
+ BGP_PERS_ENABLE_GlobalInts |\
+ BGP_PERS_ENABLE_Collective |\
+ BGP_PERS_ENABLE_SerDes |\
+ BGP_PERS_ENABLE_UPC |\
+ BGP_PERS_ENABLE_EnvMon |\
+ BGP_PERS_ENABLE_FPU |\
+ BGP_PERS_ENABLE_StandAlone)
+
+// these fields are defined by the control system depending on compute/io node
+// BGP_PERS_ENABLE_Torus |
+// BGP_PERS_ENABLE_TorusMeshX |
+// BGP_PERS_ENABLE_TorusMeshY |
+// BGP_PERS_ENABLE_TorusMeshZ |
+
+
+
+// Personality.L1Config: Controls and Settings for L1 Cache
+#define BGP_PERS_L1CONFIG_L1I _BN( 0) // L1 Enabled for Instructions
+#define BGP_PERS_L1CONFIG_L1D _BN( 1) // L1 Enabled for Data
+#define BGP_PERS_L1CONFIG_L1SWOA _BN( 2) // L1 Store WithOut Allocate
+#define BGP_PERS_L1CONFIG_L1Recovery _BN( 3) // L1 Full Recovery Mode
+#define BGP_PERS_L1CONFIG_L1WriteThru _BN( 4) // L1 Write-Thru (not svc_host changeable (yet?))
+#define BGP_PERS_L1CONFIG_DO_L1ITrans _BN( 5) // Enable L1 Instructions Transient?
+#define BGP_PERS_L1CONFIG_DO_L1DTrans _BN( 6) // Enable L1 Data Transient?
+ // unused 9bits: 7..15
+#define BGP_PERS_L1CONFIG_L1ITrans(x) _B8(23,x) // L1 Transient for Instructions in Groups of 16 Lines
+#define BGP_PERS_L1CONFIG_L1DTrans(x) _B8(31,x) // L1 Transient for Data in Groups of 16 Lines
+
+#define BGP_PERS_L1CONFIG_DEFAULT (BGP_PERS_L1CONFIG_L1I |\
+ BGP_PERS_L1CONFIG_L1D |\
+ BGP_PERS_L1CONFIG_L1SWOA |\
+ BGP_PERS_L1CONFIG_L1Recovery |\
+ BGP_PERS_L1CONFIG_L1WriteThru)
+
+typedef union TBGP_Pers_L1Cfg
+ {
+ uint32_t l1cfg;
+ struct {
+ unsigned l1i : 1;
+ unsigned l1d : 1;
+ unsigned l1swoa : 1;
+ unsigned l1recovery : 1;
+ unsigned l1writethru : 1;
+ unsigned do_l1itrans : 1;
+ unsigned do_l1dtrans : 1;
+ unsigned l1rsvd : 9;
+ unsigned l1itrans : 8;
+ unsigned l1dtrans : 8;
+ };
+ }
+ BGP_Pers_L1Cfg;
+
+// Personality.L2Config: Controls and Settings for L2 and Snoop
+#define BGP_PERS_L2CONFIG_L2I _BN( 0) // L2 Instruction Caching Enabled
+#define BGP_PERS_L2CONFIG_L2D _BN( 1) // L2 Data Caching Enabled
+#define BGP_PERS_L2CONFIG_L2PF _BN( 2) // L2 Automatic Prefetching Enabled
+#define BGP_PERS_L2CONFIG_L2PFO _BN( 3) // L2 Optimistic Prefetching Enabled
+#define BGP_PERS_L2CONFIG_L2PFA _BN( 4) // L2 Aggressive Prefetching Enabled (fewer deeper streams)
+#define BGP_PERS_L2CONFIG_L2PFS _BN( 5) // L2 Aggressive Many-Stream Prefetching Enabled (deeper only when available buffers)
+#define BGP_PERS_L2CONFIG_Snoop _BN( 6) // Just NULL Snoop Filter
+#define BGP_PERS_L2CONFIG_SnoopCache _BN( 7) // Snoop Caches
+#define BGP_PERS_L2CONFIG_SnoopStream _BN( 8) // Snoop Stream Registers (Disable for BG/P Rit 1.0 due to PPC450 errata)
+#define BGP_PERS_L2CONFIG_SnoopRange _BN( 9) // Snoop Range Filter when possible
+#define BGP_PERS_L2CONFIG_BUG824LUMPY _BN(10) // BPC_BUGS 824: Fix with Lumpy Performance
+#define BGP_PERS_L2CONFIG_BUG824SMOOTH _BN(11) // BPC_BUGS 824: Fix with Smooth Performance, but -12% Memory
+#define BGP_PERS_L2CONFIG_NONCOHERENT_STACKS _BN(12) // Special for Snoop diagnostics. See bgp_vmm.c
+ // additional bits may be used for Snoop setting tweaks
+
+// Default L2 Configuration:
+// L2 Enabled with Multi-Stream Aggressive Prefetching
+// Snoop Enabled with all filters except Range
+#define BGP_PERS_L2CONFIG_DEFAULT (BGP_PERS_L2CONFIG_L2I |\
+ BGP_PERS_L2CONFIG_L2D |\
+ BGP_PERS_L2CONFIG_L2PF |\
+ BGP_PERS_L2CONFIG_L2PFO |\
+ BGP_PERS_L2CONFIG_L2PFS |\
+ BGP_PERS_L2CONFIG_Snoop |\
+ BGP_PERS_L2CONFIG_SnoopCache |\
+ BGP_PERS_L2CONFIG_SnoopStream|\
+ BGP_PERS_L2CONFIG_BUG824LUMPY)
+
+
+// Personality.L3Config: Controls and Settings for L3
+// Note: Most bits match BGP_L3x_CTRL DCRs.
+// See arch/include/bpcore/bgl_l3_dcr.h
+#define BGP_PERS_L3CONFIG_L3I _BN( 0) // L3 Enabled for Instructions
+#define BGP_PERS_L3CONFIG_L3D _BN( 1) // L3 Enabled for Data
+#define BGP_PERS_L3CONFIG_L3PFI _BN( 2) // Inhibit L3 Prefetch from DDR
+#define BGP_PERS_L3CONFIG_DO_Scratch _BN( 3) // Set up Scratch?
+#define BGP_PERS_L3CONFIG_DO_PFD0 _BN( 4) // Adjust PFD0?
+#define BGP_PERS_L3CONFIG_DO_PFD1 _BN( 5) // Adjust PFD1?
+#define BGP_PERS_L3CONFIG_DO_PFDMA _BN( 6) // Adjust PFDMA?
+#define BGP_PERS_L3CONFIG_DO_PFQD _BN( 7) // Adjust PFQD?
+ // 8..15 unused/available
+#define BGP_PERS_L3CONFIG_Scratch(x) _B4(19,x) // Scratch 8ths: 0..8
+#define BGP_PERS_L3CONFIG_PFD0(x) _B3(22,x) // Prefetch Depth for DP0
+#define BGP_PERS_L3CONFIG_PFD1(x) _B3(25,x) // Prefetch Depth for DP1
+#define BGP_PERS_L3CONFIG_PFDMA(x) _B3(28,x) // Prefetch Depth for DMA
+#define BGP_PERS_L3CONFIG_PFQD(x) _B3(31,x) // Prefetch Queue Depth
+
+// General L3 Configuration
+typedef union TBGP_Pers_L3Cfg
+ {
+ uint32_t l3cfg;
+ struct {
+ unsigned l3i : 1;
+ unsigned l3d : 1;
+ unsigned l3pfi : 1;
+ unsigned do_scratch : 1;
+ unsigned do_pfd0 : 1;
+ unsigned do_pfd1 : 1;
+ unsigned do_pfdma : 1;
+ unsigned do_pfqd : 1;
+ unsigned rsvd : 8;
+ unsigned scratch : 4;
+ unsigned pfd0 : 3;
+ unsigned pfd1 : 3;
+ unsigned pfdma : 3;
+ unsigned pfqd : 3;
+ };
+ }
+ BGP_Pers_L3Cfg;
+
+// Default L3 Configuration:
+// L3 Enabled for Instructions and Data
+// No Prefetch Depth overrides, No Scratch, No Scrambling.
+#define BGP_PERS_L3CONFIG_DEFAULT (BGP_PERS_L3CONFIG_L3I |\
+ BGP_PERS_L3CONFIG_L3D |\
+ BGP_PERS_L3CONFIG_DO_PFDMA |\
+ BGP_PERS_L3CONFIG_PFDMA(4))
+
+
+// L3 Cache and Bank Selection, and prefetching tweaks (Recommended for Power-Users)
+#define BGP_PERS_L3SELECT_DO_CacheSel _BN( 0) // Adjust Cache Select setting?
+#define BGP_PERS_L3SELECT_DO_BankSel _BN( 1) // Adjust Bank Select setting?
+#define BGP_PERS_L3SELECT_Scramble _BN( 2) // L3 Scramble
+#define BGP_PERS_L3SELECT_PFby2 _BN( 3) // Prefetch by 2 if set, else by 1 (default) if clear.
+#define BGP_PERS_L3SELECT_CacheSel(x) _B5( 8,x) // PhysAddr Bit for L3 Selection (0..26)
+#define BGP_PERS_L3SELECT_BankSel(x) _B5(13,x) // PhysAddr Bit for L3 Bank Selection (0..26) Must be > CacheSel.
+
+typedef union TBGP_Pers_L3Select
+ {
+ uint32_t l3select;
+ struct {
+ unsigned do_CacheSel : 1;
+ unsigned do_BankSel : 1;
+ unsigned l3Scramble : 1;
+ unsigned l3_PF_by2 : 1; // default is PreFetch by 1.
+ unsigned CacheSel : 5; // Physical Address Bit for L3 Selection (0..26)
+ unsigned BankSel : 5; // 0..26 Must be strictly greater than CacheSel.
+ unsigned rsvd : 18;
+ };
+ }
+ BGP_Pers_L3Select;
+
+// Default L3 Selection Configuration: Disable overrides, but set h/w default values.
+#define BGP_PERS_L3SELECT_DEFAULT (BGP_PERS_L3SELECT_CacheSel(21) |\
+ BGP_PERS_L3SELECT_BankSel(26))
+
+// Tracing Masks and default trace configuration
+#define BGP_TRACE_CONFIG _BN( 0) // Display Encoded personality config on startup
+#define BGP_TRACE_ENTRY _BN( 1) // Function enter and exit
+#define BGP_TRACE_INTS _BN( 2) // Standard Interrupt Dispatch
+#define BGP_TRACE_CINTS _BN( 3) // Critical Interrupt Dispatch
+#define BGP_TRACE_MCHK _BN( 4) // Machine Check Dispatch
+#define BGP_TRACE_SYSCALL _BN( 5) // System Calls
+#define BGP_TRACE_VMM _BN( 6) // Virtual Memory Manager
+#define BGP_TRACE_DEBUG _BN( 7) // Debug Events (app crashes etc)
+#define BGP_TRACE_TORUS _BN( 8) // Torus Init
+#define BGP_TRACE_TREE _BN( 9) // Tree Init
+#define BGP_TRACE_GLOBINT _BN(10) // Global Interrupts
+#define BGP_TRACE_DMA _BN(11) // DMA Setup
+#define BGP_TRACE_SERDES _BN(12) // SerDes Init
+#define BGP_TRACE_TESTINT _BN(13) // Test Interface, ECID, Config
+#define BGP_TRACE_ETHTX _BN(14) // Ethernet Transmit
+#define BGP_TRACE_ETHRX _BN(15) // Ethernet Receive
+#define BGP_TRACE_POWER _BN(16) // Power Control
+#define BGP_TRACE_PROCESS _BN(17) // Process/Thread Mapping
+#define BGP_TRACE_EXIT_SUM _BN(18) // Report Per-Core Interrupt and Error Summary on exit()
+#define BGP_TRACE_SCHED _BN(19) // Report Scheduler Information
+#define BGP_TRACE_RAS _BN(20) // Report RAS Events (in addition to sending to Host)
+#define BGP_TRACE_ECID _BN(21) // Report UCI and ECID on boot
+#define BGP_TRACE_FUTEX _BN(22) // Trace Futex operations
+#define BGP_TRACE_MemAlloc _BN(23) // Trace MMAP and Shared Memory operations
+#define BGP_TRACE_WARNINGS _BN(30) // Trace Warnings
+#define BGP_TRACE_VERBOSE _BN(31) // Verbose Tracing Modifier
+
+// Enable tracking of Regression Suite coverage and report UCI+ECID on boot
+#define BGP_PERS_TRACE_DEFAULT (BGP_TRACE_CONFIG | BGP_TRACE_ECID)
+
+
+typedef struct BGP_Personality_Kernel_t
+ {
+ uint32_t UniversalComponentIdentifier; // see include/common/bgp_ras.h
+
+ uint32_t FreqMHz; // Clock_X1 Frequency in MegaHertz (eg 1000)
+
+ uint32_t RASPolicy; // Verbosity level, and other RAS Reporting Controls
+
+ // Process Config:
+ // Each byte represents a process (1 to 4 processes supported)
+ // No core can be assigned to more than 1 process.
+ // Cores assigned to no process are disabled.
+ // Cores with in a process share the same address space.
+ // Separate processes have distinct address spaces.
+ // Within each process (0 to 4 cores assigned to a process):
+ // Lower nibble is bitmask of which core belongs to that process.
+ // Upper nibble is bitmask whether that thread is privileged or user.
+ // Processes with zero cores do not exist.
+ // E.g., for Diagnostics, we use 0xFF000000, which means
+ // that all 4 cores run privileged in process 0.
+ uint32_t ProcessConfig;
+
+ uint32_t TraceConfig; // Kernel Tracing Enables
+ uint32_t NodeConfig; // Kernel Driver Enables
+ uint32_t L1Config; // L1 Config and setup controls
+ uint32_t L2Config; // L2 and Snoop Config and setup controls
+ uint32_t L3Config; // L3 Config and setup controls
+ uint32_t L3Select; // L3 Cache and Bank Selection controls
+
+ uint32_t SharedMemMB; // Memory to Reserve for Sharing among Processes
+
+ uint32_t ClockStop0; // Upper 11Bits of ClockStop, enabled if Non-zero
+ uint32_t ClockStop1; // Lower 32Bits of ClockStop, enabled if Non-zero
+ }
+ BGP_Personality_Kernel_t;
+
+
+// Defaults for DDR Config
+#define BGP_PERS_DDR_PBX0_DEFAULT (0x411D1512) // PBX DCRs setting (in IBM bit numbering)
+#define BGP_PERS_DDR_PBX1_DEFAULT (0x40000000) // PBX DCRs setting (in IBM bit numbering)
+#define BGP_PERS_DDR_MemConfig0_DEFAULT (0x81fc4080) // MemConfig
+#define BGP_PERS_DDR_MemConfig1_DEFAULT (0x0C0ff800) // MemConfig
+#define BGP_PERS_DDR_ParmCtl0_DEFAULT (0x3216c008) // Parm Control
+#define BGP_PERS_DDR_ParmCtl1_DEFAULT (0x4168c323) // Parm Control
+#define BGP_PERS_DDR_MiscCtl0_DEFAULT (0) // Misc. Control
+#define BGP_PERS_DDR_MiscCtl1_DEFAULT (0) // Misc. Control
+#define BGP_PERS_DDR_CmdBufMode0_DEFAULT (0x00400fdf) // Command Buffer Mode
+#define BGP_PERS_DDR_CmdBufMode1_DEFAULT (0xffc80600) // Command Buffer Mode
+#define BGP_PERS_DDR_RefrInterval0_DEFAULT (0xD1000002) // Refresh Interval
+#define BGP_PERS_DDR_RefrInterval1_DEFAULT (0x04000000) // Refresh Interval
+#define BGP_PERS_DDR_ODTCtl0_DEFAULT (0) // ODT Control
+#define BGP_PERS_DDR_ODTCtl1_DEFAULT (0) // ODT Control
+#define BGP_PERS_DDR_DataStrobeCalib0_DEFAULT (0x08028a64) // Data Strobe Calibration
+#define BGP_PERS_DDR_DataStrobeCalib1_DEFAULT (0xa514c805) // Data Strobe Calibration
+#define BGP_PERS_DDR_DQSCtl_DEFAULT (0x00000168) // DQS Control
+#define BGP_PERS_DDR_Throttle_DEFAULT (0) // DDR Throttle
+//1#define BGP_PERS_DDR_DDRSizeMB_DEFAULT (4096) // Total DDR size in MegaBytes (512MB - 16384MB).
+#define BGP_PERS_DDR_DDRSizeMB_DEFAULT (1024) // Total DDR size in MegaBytes (512MB - 16384MB).
+//1#define BGP_PERS_DDR_Chips_DEFAULT (0x0B) // Type of DDR chips
+#define BGP_PERS_DDR_Chips_DEFAULT (0x09) // Type of DDR chips
+#define BGP_PERS_DDR_CAS_DEFAULT (4) // CAS Latency (3, 4, or 5)
+
+
+#define BGP_PERS_DDRFLAGS_ENABLE_Scrub _BN(0) // Enable DDR Slow Scrub when 1
+
+// DDRFLAGS default: Enable Slow Scrub.
+#define BGP_PERS_DDRFLAGS_DEFAULT (BGP_PERS_DDRFLAGS_ENABLE_Scrub)
+
+#define BGP_PERS_SRBS0_DEFAULT (0)
+#define BGP_PERS_SRBS1_DEFAULT (0)
+
+typedef struct BGP_Personality_DDR_t
+ {
+ uint32_t DDRFlags; // Misc. Flags and Settings
+ uint32_t SRBS0; // Controller 0 SRBS/CK Settings
+ uint32_t SRBS1; // Controller 1 SRBS/CK Settings
+ uint32_t PBX0; // PBX DCRs setting (in IBM bit numbering)
+ uint32_t PBX1; // PBX DCRs setting (in IBM bit numbering)
+ uint32_t MemConfig0; // MemConfig
+ uint32_t MemConfig1; // MemConfig
+ uint32_t ParmCtl0; // Parm Control
+ uint32_t ParmCtl1; // Parm Control
+ uint32_t MiscCtl0; // Misc. Control
+ uint32_t MiscCtl1; // Misc. Control
+ uint32_t CmdBufMode0; // Command Buffer Mode
+ uint32_t CmdBufMode1; // Command Buffer Mode
+ uint32_t RefrInterval0; // Refresh Interval
+ uint32_t RefrInterval1; // Refresh Interval
+ uint32_t ODTCtl0; // ODT Control
+ uint32_t ODTCtl1; // ODT Control
+ uint32_t DataStrobeCalib0; // Data Strobe Calibration
+ uint32_t DataStrobeCalib1; // Data Strobe Calibration
+ uint32_t DQSCtl; // DQS Control
+ uint32_t Throttle; // DDR Throttle
+ uint16_t DDRSizeMB; // Total DDR size in MegaBytes (512MB - 16384MB).
+ uint8_t Chips; // Type of DDR chips
+ uint8_t CAS; // CAS Latency (3, 4, or 5)
+ }
+ BGP_Personality_DDR_t;
+
+
+typedef struct BGP_Personality_Networks_t
+ {
+ uint32_t BlockID; // a.k.a. PartitionID
+
+ uint8_t Xnodes,
+ Ynodes,
+ Znodes,
+ Xcoord,
+ Ycoord,
+ Zcoord;
+
+ // PSet Support
+ uint16_t PSetNum;
+ uint32_t PSetSize;
+ uint32_t RankInPSet;
+
+ uint32_t IOnodes;
+ uint32_t Rank; // Rank in Block (or Partition)
+ uint32_t IOnodeRank; // Rank (and therefore P2P Addr) of my I/O Node
+ uint16_t TreeRoutes[ 16 ];
+ }
+ BGP_Personality_Networks_t;
+
+
+typedef struct BGP_IP_Addr_t
+ {
+ // IPv6 Addresses are 16 bytes, where the
+ // lower 4 (indices 12-15) can be used for IPv4 address.
+ uint8_t octet[ 16 ];
+ }
+ BGP_IP_Addr_t;
+
+
+typedef struct BGP_Personality_Ethernet_t
+ {
+ uint16_t MTU; // Initial emac MTU size
+ uint8_t EmacID[6]; // MAC address for emac
+ BGP_IP_Addr_t IPAddress; // IPv6/IPv4 address of this node
+ BGP_IP_Addr_t IPNetmask; // IPv6/IPv4 netmask
+ BGP_IP_Addr_t IPBroadcast; // IPv6/IPv4 broadcast address
+ BGP_IP_Addr_t IPGateway; // IPv6/IPv4 initial gateway (zero if none)
+ BGP_IP_Addr_t NFSServer; // IPv6/IPv4 NFS system software server address
+ BGP_IP_Addr_t serviceNode; // IPv6/IPv4 address of service node
+
+ // NFS mount info
+ char NFSExportDir[BGP_PERSONALITY_LEN_NFSDIR];
+ char NFSMountDir[BGP_PERSONALITY_LEN_NFSDIR];
+
+ // Security Key for Service Node authentication
+ uint8_t SecurityKey[BGP_PERSONALITY_LEN_SECKEY ];
+ }
+ BGP_Personality_Ethernet_t;
+
+
+
+#define BGP_PERS_BLKCFG_IPOverCollective _BN(31)
+#define BGP_PERS_BLKCFG_IPOverTorus _BN(30)
+#define BGP_PERS_BLKCFG_IPOverCollectiveVC _BN(29)
+#define BGP_PERS_BLKCFG_CIOModeSel(x) _B2(28,x)
+#define BGP_PERS_BLKCFG_bgsysFSSel(x) _B3(26,x)
+#define BGP_PERS_BLKCFG_CIOMode_Full 0
+#define BGP_PERS_BLKCFG_CIOMode_MuxOnly 1
+#define BGP_PERS_BLKCFG_CIOMode_None 2
+#define BGP_PERS_BLKCFG_bgsys_NFSv3 0
+#define BGP_PERS_BLKCFG_bgsys_NFSv4 1
+#define BGP_PERS_BLKCFG_DEFAULT (BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full) | \
+ BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3))
+
+typedef struct TBGP_Personality_t
+ {
+ uint16_t CRC;
+ uint8_t Version;
+ uint8_t PersonalitySizeWords;
+
+ BGP_Personality_Kernel_t Kernel_Config;
+
+ BGP_Personality_DDR_t DDR_Config;
+
+ BGP_Personality_Networks_t Network_Config;
+
+ BGP_Personality_Ethernet_t Ethernet_Config;
+
+ uint8_t Block_Config;
+ uint8_t padd[7]; // Pad size to multiple of 16 bytes (== width of DEVBUS_DATA tdr)
+ // to simplify jtag operations. See issue #140.
+ }
+ BGP_Personality_t;
+
+
+// Define a static initializer for default configuration. (DEFAULTS FOR SIMULATION)
+// This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c
+#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER { \
+ 0, /* CRC */ \
+ BGP_PERSONALITY_VERSION, /* Version */ \
+ (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \
+ { /* BGP_Personality_Kernel_t: */ \
+ 0, /* MachineLocation */ \
+ BGP_DEFAULT_FREQ, /* FreqMHz */ \
+ BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \
+ BGP_PERS_PROCESSCONFIG_DEFAULT, /* ProcessConfig */ \
+ BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \
+ BGP_PERS_NODECONFIG_DEFAULT, /* NodeConfig */ \
+ BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \
+ BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \
+ BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \
+ BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \
+ 0, /* SharedMemMB */ \
+ 0, /* ClockStop0 */ \
+ 0 /* ClockStop1 */ \
+ }, \
+ { /* BGP_Personality_DDR_t: */ \
+ BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \
+ BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \
+ BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \
+ BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \
+ BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \
+ BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \
+ BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \
+ BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \
+ BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \
+ BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \
+ BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \
+ BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \
+ BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \
+ BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \
+ BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \
+ BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \
+ BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \
+ BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \
+ BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+ BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \
+ BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \
+ BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \
+ BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \
+ BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \
+ }, \
+ { /* BGP_Personality_Networks_t: */ \
+ 0, /* BlockID */ \
+ 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \
+ 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \
+ 0, /* PSetNum */ \
+ 0, /* PSetSize */ \
+ 0, /* RankInPSet */ \
+ 0, /* IOnodes */ \
+ 0, /* Rank */ \
+ 0, /* IOnodeRank */ \
+ { 0, } /* TreeRoutes[ 16 ] */ \
+ }, \
+ { /* BGP_Personality_Ethernet_t: */ \
+ 1536, /* mtu */ \
+ { 0, }, /* EmacID[6] */ \
+ { { 0x00,0x00,0x00,0x00, /* IPAddress */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0xFF,0xFF,0xFF,0x70 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPGateway */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* NFSServer */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* serviceNode */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ "", /* NFSExportDir[32] */ \
+ "", /* NFSMountDir[32] */ \
+ { 0x00, } /* SecurityKey[32] */ \
+ }, \
+ 0, /* Block_Config */ \
+ { 0, } /* padd[7] */ \
+ }
+
+
+// Define a static initializer for default configuration. (DEFAULTS FOR HARDWARE)
+// This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c
+#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER_FOR_HARDWARE { \
+ 0, /* CRC */ \
+ BGP_PERSONALITY_VERSION, /* Version */ \
+ (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \
+ { /* BGP_Personality_Kernel_t: */ \
+ 0, /* MachineLocation */ \
+ BGP_DEFAULT_FREQ, /* FreqMHz */ \
+ BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \
+ BGP_PERS_PROCESSCONFIG_SMP, /* ProcessConfig */ \
+ BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \
+ BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE, /* NodeConfig */ \
+ BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \
+ BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \
+ BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \
+ BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \
+ 0, /* SharedMemMB */ \
+ 0, /* ClockStop0 */ \
+ 0 /* ClockStop1 */ \
+ }, \
+ { /* BGP_Personality_DDR_t: */ \
+ BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \
+ BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \
+ BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \
+ BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \
+ BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \
+ BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \
+ BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \
+ BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \
+ BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \
+ BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \
+ BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \
+ BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \
+ BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \
+ BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \
+ BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \
+ BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \
+ BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \
+ BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \
+ BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+ BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \
+ BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \
+ BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \
+ BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \
+ BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \
+ }, \
+ { /* BGP_Personality_Networks_t: */ \
+ 0, /* BlockID */ \
+ 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \
+ 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \
+ 0, /* PSetNum */ \
+ 0, /* PSetSize */ \
+ 0, /* RankInPSet */ \
+ 0, /* IOnodes */ \
+ 0, /* Rank */ \
+ 0, /* IOnodeRank */ \
+ { 0, } /* TreeRoutes[ 16 ] */ \
+ }, \
+ { /* BGP_Personality_Ethernet_t: */ \
+ 1536, /* mtu */ \
+ { 0, }, /* EmacID[6] */ \
+ { { 0x00,0x00,0x00,0x00, /* IPAddress */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0xFF,0xFF,0xFF,0x70 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPGateway */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* NFSServer */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* serviceNode */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ "", /* NFSExportDir[32] */ \
+ "", /* NFSMountDir[32] */ \
+ { 0x00, } /* SecurityKey[32] */ \
+ }, \
+ 0, /* Block_Config */ \
+ { 0, } /* padd[7] */ \
+ }
+
+
+
+
+#endif // Add nothing below this line.
diff --git a/arch/powerpc/boot/dts/bgp.dts b/arch/powerpc/boot/dts/bgp.dts
new file mode 100644
index 00000000000000..855a00808fa15b
--- /dev/null
+++ b/arch/powerpc/boot/dts/bgp.dts
@@ -0,0 +1,127 @@
+/*
+ * Device Tree Source for IBM BlueGene/P
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+
+ * Josh Boyer <jwboyer@linux.vnet.ibm.com>, David Gibson <dwg@au1.ibm.com>
+ *
+ * Cloned from 'Ebony', and revised.
+ *
+ */
+
+/dts-v1/;
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ model = "ibm,bluegenep";
+ compatible = "ibm,bluegenep";
+ dcr-parent = <&{/cpus/cpu@0}>;
+
+/* aliases {
+ ethernet0 = &EMAC0;
+ };
+*/
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ model = "PowerPC,450";
+ reg = <0x00000000>;
+ clock-frequency = <850000000>;
+ timebase-frequency = <850000000>;
+ i-cache-line-size = <32>;
+ d-cache-line-size = <32>;
+ i-cache-size = <32768>; /* 32 kB */
+ d-cache-size = <32768>; /* 32 kB */
+ dcr-controller;
+ dcr-access-method = "native";
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ model = "PowerPC,450";
+ reg = <0x00000000>;
+ clock-frequency = <850000000>;
+ timebase-frequency = <850000000>;
+ i-cache-line-size = <32>;
+ d-cache-line-size = <32>;
+ i-cache-size = <32768>; /* 32 kB */
+ d-cache-size = <32768>; /* 32 kB */
+ dcr-controller;
+ dcr-access-method = "native";
+ };
+
+ cpu@2 {
+ device_type = "cpu";
+ model = "PowerPC,450";
+ reg = <0x00000000>;
+ clock-frequency = <850000000>;
+ timebase-frequency = <850000000>;
+ i-cache-line-size = <32>;
+ d-cache-line-size = <32>;
+ i-cache-size = <32768>; /* 32 kB */
+ d-cache-size = <32768>; /* 32 kB */
+ dcr-controller;
+ dcr-access-method = "native";
+ };
+
+ cpu@3 {
+ device_type = "cpu";
+ model = "PowerPC,450";
+ reg = <0x00000000>;
+ clock-frequency = <850000000>;
+ timebase-frequency = <850000000>;
+ i-cache-line-size = <32>;
+ d-cache-line-size = <32>;
+ i-cache-size = <32768>; /* 32 kB */
+ d-cache-size = <32768>; /* 32 kB */
+ dcr-controller;
+ dcr-access-method = "native";
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x00000000 0x00000000>; // from wrapper
+ };
+
+ ibm,bluegene {
+ cns { // from wrapper
+ version = <0>;
+ size = <0>;
+ base-va = <0>;
+ base-pa = <0>; // assume <= 4G
+ services = <0>;
+ };
+ personality { // from wrapper
+ version = <0>;
+ frequency = <850000000>;
+ };
+ };
+
+ chosen {
+ bootargs = "console=bgcons root=/dev/ram0 lpj=8500000 profile=2 log_buf_len=8388608 rdinit=/sbin/init";
+
+ // the bgp wrapper locates a ramdisk and updates initrd-start/end
+ linux,initrd-start = <0>;
+ linux,initrd-end = <0>;
+ };
+};
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 965c237c122d78..23c6ccfdb5fbf5 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -222,6 +222,11 @@ asp834x-redboot)
platformo="$object/fixed-head.o $object/redboot-83xx.o"
binary=y
;;
+bgp)
+ platformo="--section-start bgstart=0 $object/fixed-head.o $object/bgp.o"
+ link_address='0x00800000'
+ ;;
+
esac
vmz="$tmpdir/`basename \"$kernel\"`.$ext"
diff --git a/arch/powerpc/configs/44x/bgp_defconfig b/arch/powerpc/configs/44x/bgp_defconfig
new file mode 100644
index 00000000000000..b90cc818cdf64f
--- /dev/null
+++ b/arch/powerpc/configs/44x/bgp_defconfig
@@ -0,0 +1,929 @@
+#
+# Automatically generated make config: don't edit
+# Linux kernel version: 2.6.29.1
+# Wed May 6 13:09:35 2009
+#
+# CONFIG_PPC64 is not set
+
+#
+# Processor support
+#
+# CONFIG_6xx is not set
+# CONFIG_PPC_85xx is not set
+# CONFIG_PPC_8xx is not set
+# CONFIG_40x is not set
+CONFIG_44x=y
+# CONFIG_E200 is not set
+CONFIG_PPC_FPU=y
+CONFIG_4xx=y
+CONFIG_BOOKE=y
+CONFIG_PTE_64BIT=y
+CONFIG_PHYS_64BIT=y
+CONFIG_PPC_MMU_NOHASH=y
+# CONFIG_PPC_MM_SLICES is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+# CONFIG_NOT_COHERENT_CACHE is not set
+CONFIG_L1_WRITETHROUGH=y
+CONFIG_PPC32=y
+CONFIG_WORD_SIZE=32
+CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
+CONFIG_MMU=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_TIME_VSYSCALL=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_HARDIRQS=y
+# CONFIG_HAVE_SETUP_PER_CPU_AREA is not set
+CONFIG_IRQ_PER_CPU=y
+CONFIG_STACKTRACE_SUPPORT=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+CONFIG_ARCH_HAS_ILOG2_U32=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+# CONFIG_ARCH_NO_VIRT_TO_BUS is not set
+CONFIG_PPC=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GENERIC_NVRAM=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_PPC_OF=y
+CONFIG_OF=y
+CONFIG_PPC_UDBG_16550=y
+CONFIG_GENERIC_TBSYNC=y
+CONFIG_AUDIT_ARCH=y
+CONFIG_GENERIC_BUG=y
+# CONFIG_DEFAULT_UIMAGE is not set
+CONFIG_PPC_DCR_NATIVE=y
+# CONFIG_PPC_DCR_MMIO is not set
+CONFIG_PPC_DCR=y
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+
+#
+# General setup
+#
+CONFIG_EXPERIMENTAL=y
+CONFIG_LOCK_KERNEL=y
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_LOCALVERSION=""
+CONFIG_LOCALVERSION_AUTO=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_BSD_PROCESS_ACCT is not set
+# CONFIG_TASKSTATS is not set
+# CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_GROUP_SCHED=y
+CONFIG_FAIR_GROUP_SCHED=y
+# CONFIG_RT_GROUP_SCHED is not set
+CONFIG_USER_SCHED=y
+# CONFIG_CGROUP_SCHED is not set
+# CONFIG_CGROUPS is not set
+CONFIG_SYSFS_DEPRECATED=y
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_RELAY=y
+# CONFIG_NAMESPACES is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
+CONFIG_EMBEDDED=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_HOTPLUG=y
+CONFIG_PRINTK=y
+CONFIG_BUG=y
+CONFIG_ELF_CORE=y
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_SIGNALFD=y
+CONFIG_TIMERFD=y
+CONFIG_EVENTFD=y
+CONFIG_SHMEM=y
+CONFIG_AIO=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_SLUB_DEBUG=y
+CONFIG_COMPAT_BRK=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
+# CONFIG_SLOB is not set
+CONFIG_PROFILING=y
+CONFIG_OPROFILE=y
+CONFIG_HAVE_OPROFILE=y
+# CONFIG_KPROBES is not set
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_SLABINFO=y
+CONFIG_RT_MUTEXES=y
+CONFIG_BASE_SMALL=0
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+# CONFIG_MODVERSIONS is not set
+# CONFIG_MODULE_SRCVERSION_ALL is not set
+CONFIG_STOP_MACHINE=y
+CONFIG_BLOCK=y
+CONFIG_LBD=y
+# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_INTEGRITY is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+# CONFIG_FREEZER is not set
+
+#
+# Platform support
+#
+# CONFIG_PPC_CELL is not set
+# CONFIG_PPC_CELL_NATIVE is not set
+# CONFIG_PQ2ADS is not set
+# CONFIG_BAMBOO is not set
+# CONFIG_EBONY is not set
+# CONFIG_SAM440EP is not set
+# CONFIG_SEQUOIA is not set
+# CONFIG_TAISHAN is not set
+# CONFIG_KATMAI is not set
+# CONFIG_RAINIER is not set
+# CONFIG_WARP is not set
+# CONFIG_CANYONLANDS is not set
+# CONFIG_YOSEMITE is not set
+CONFIG_BGP=y
+# CONFIG_XILINX_VIRTEX440_GENERIC_BOARD is not set
+CONFIG_BLUEGENE=y
+# CONFIG_BLUEGENE_MAMBO is not set
+# CONFIG_BGP_DD1 is not set
+CONFIG_BLUEGENE_TCP=y
+# CONFIG_BLUEGENE_DMA_MEMCPY is not set
+CONFIG_BLUEGENE_COLLECTIVE_TRACE=y
+CONFIG_BLUEGENE_TORUS_TRACE=y
+
+CONFIG_BGP_STATISTICS=y
+# CONFIG_BLUEGENE_SHARE_WITH_VRNIC is not set
+# CONFIG_BLUEGENE_TCP_WITHOUT_NAPI is not set
+# CONFIG_BLUEGENE_UNIPROCESSOR is not set
+# CONFIG_BLUEGENE_SOCKETS is not set
+CONFIG_HUGE_KMALLOC=y
+CONFIG_DEBUG_ALIGNMENT_HISTOGRAM=y
+# CONFIG_DEBUG_STACK_USAGE is not set
+CONFIG_IBM_OCP=y
+CONFIG_IBM_EMAC4=y
+# CONFIG_PPC4xx_DMA is not set
+CONFIG_PPC_GEN550=y
+# CONFIG_IPIC is not set
+# CONFIG_MPIC is not set
+# CONFIG_MPIC_WEIRD is not set
+# CONFIG_PPC_I8259 is not set
+# CONFIG_PPC_RTAS is not set
+# CONFIG_MMIO_NVRAM is not set
+# CONFIG_PPC_MPC106 is not set
+# CONFIG_PPC_970_NAP is not set
+# CONFIG_PPC_INDIRECT_IO is not set
+# CONFIG_GENERIC_IOMAP is not set
+# CONFIG_CPU_FREQ is not set
+# CONFIG_FSL_ULI1575 is not set
+# CONFIG_SIMPLE_GPIO is not set
+
+#
+# Kernel options
+#
+CONFIG_HIGHMEM=y
+# CONFIG_NO_HZ is not set
+# CONFIG_HIGH_RES_TIMERS is not set
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_HZ_100=y
+# CONFIG_HZ_250 is not set
+# CONFIG_HZ_300 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ=100
+# CONFIG_SCHED_HRTICK is not set
+CONFIG_PREEMPT_NONE=y
+# CONFIG_PREEMPT_VOLUNTARY is not set
+# CONFIG_PREEMPT is not set
+CONFIG_BINFMT_ELF=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_HAVE_AOUT is not set
+# CONFIG_BINFMT_MISC is not set
+CONFIG_MATH_EMULATION=y
+# CONFIG_IOMMU_HELPER is not set
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_HAS_WALK_MEMORY=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
+# CONFIG_IRQ_ALL_CPUS is not set
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_FLATMEM_MANUAL=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_FLATMEM=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
+CONFIG_PHYS_ADDR_T_64BIT=y
+CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
+CONFIG_VIRT_TO_BUS=y
+CONFIG_UNEVICTABLE_LRU=y
+# CONFIG_PPC_4K_PAGES is not set
+# CONFIG_PPC_16K_PAGES is not set
+CONFIG_PPC_64K_PAGES=y
+CONFIG_FORCE_MAX_ZONEORDER=11
+CONFIG_PROC_DEVICETREE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=bgcons root=/dev/ram0 lpj=8500000 profile=2 log_buf_len=8388608"
+CONFIG_WRAP_COPY_TOFROM_USER=y
+CONFIG_EXTRA_TARGETS=""
+CONFIG_SECCOMP=y
+CONFIG_ISA_DMA_API=y
+
+#
+# Bus options
+#
+CONFIG_ZONE_DMA=y
+CONFIG_4xx_SOC=y
+CONFIG_PPC_PCI_CHOICE=y
+# CONFIG_PCI is not set
+# CONFIG_PCI_DOMAINS is not set
+# CONFIG_PCI_SYSCALL is not set
+# CONFIG_ARCH_SUPPORTS_MSI is not set
+# CONFIG_PCCARD is not set
+# CONFIG_HAS_RAPIDIO is not set
+
+#
+# Advanced setup
+#
+# CONFIG_ADVANCED_OPTIONS is not set
+
+#
+# Default settings for advanced configuration options are used
+#
+CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_PAGE_OFFSET=0xc0000000
+CONFIG_KERNEL_START=0xc0000000
+CONFIG_PHYSICAL_START=0x00000000
+CONFIG_TASK_SIZE=0xc0000000
+CONFIG_NET=y
+
+#
+# Networking options
+#
+CONFIG_COMPAT_NET_DEV_OPS=y
+CONFIG_PACKET=y
+# CONFIG_PACKET_MMAP is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+CONFIG_IP_FIB_HASH=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IP_PNP_RARP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_INET_XFRM_TUNNEL is not set
+CONFIG_INET_TUNNEL=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+CONFIG_INET_DIAG=y
+CONFIG_INET_TCP_DIAG=y
+# CONFIG_TCP_CONG_ADVANCED is not set
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_TCP_MD5SIG is not set
+CONFIG_IPV6=y
+# CONFIG_IPV6_PRIVACY is not set
+# CONFIG_IPV6_ROUTER_PREF is not set
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
+# CONFIG_INET6_AH is not set
+# CONFIG_INET6_ESP is not set
+# CONFIG_INET6_IPCOMP is not set
+# CONFIG_IPV6_MIP6 is not set
+# CONFIG_INET6_XFRM_TUNNEL is not set
+# CONFIG_INET6_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET6_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET6_XFRM_MODE_BEET is not set
+# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+# CONFIG_IPV6_TUNNEL is not set
+# CONFIG_IPV6_MULTIPLE_TABLES is not set
+# CONFIG_IPV6_MROUTE is not set
+# CONFIG_NETWORK_SECMARK is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_IP_DCCP is not set
+# CONFIG_IP_SCTP is not set
+# CONFIG_TIPC is not set
+# CONFIG_ATM is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NET_DSA is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_DECNET is not set
+# CONFIG_LLC2 is not set
+# CONFIG_IPX is not set
+# CONFIG_ATALK is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_SCHED is not set
+# CONFIG_DCB is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+# CONFIG_HAMRADIO is not set
+# CONFIG_CAN is not set
+# CONFIG_IRDA is not set
+# CONFIG_BT is not set
+# CONFIG_AF_RXRPC is not set
+# CONFIG_PHONET is not set
+# CONFIG_WIRELESS is not set
+# CONFIG_WIMAX is not set
+# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
+
+#
+# Device Drivers
+#
+
+#
+# Generic Driver Options
+#
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_STANDALONE=y
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_FW_LOADER=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_EXTRA_FIRMWARE=""
+# CONFIG_DEBUG_DRIVER is not set
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_SYS_HYPERVISOR is not set
+CONFIG_CONNECTOR=y
+CONFIG_PROC_EVENTS=y
+# CONFIG_MTD is not set
+CONFIG_OF_DEVICE=y
+# CONFIG_PARPORT is not set
+CONFIG_BLK_DEV=y
+# CONFIG_BLK_DEV_FD is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+# CONFIG_BLK_DEV_LOOP is not set
+# CONFIG_BLK_DEV_NBD is not set
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_BLK_DEV_XIP is not set
+# CONFIG_CDROM_PKTCDVD is not set
+# CONFIG_ATA_OVER_ETH is not set
+# CONFIG_XILINX_SYSACE is not set
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_MISC_DEVICES=y
+# CONFIG_ENCLOSURE_SERVICES is not set
+# CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_93CX6 is not set
+CONFIG_HAVE_IDE=y
+# CONFIG_IDE is not set
+
+#
+# SCSI device support
+#
+# CONFIG_RAID_ATTRS is not set
+# CONFIG_SCSI is not set
+# CONFIG_SCSI_DMA is not set
+# CONFIG_SCSI_NETLINK is not set
+# CONFIG_ATA is not set
+# CONFIG_MD is not set
+# CONFIG_MACINTOSH_DRIVERS is not set
+CONFIG_NETDEVICES=y
+# CONFIG_DUMMY is not set
+# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
+# CONFIG_EQUALIZER is not set
+# CONFIG_TUN is not set
+# CONFIG_VETH is not set
+# CONFIG_NET_ETHERNET is not set
+# CONFIG_NETDEV_1000 is not set
+CONFIG_NETDEV_10000=y
+CONFIG_BGP_COLLECTIVE=y
+CONFIG_BGP_COLLECTIVE_IP_CHECKSUM=y
+CONFIG_BGP_COLLECTIVE_NAPI=n
+CONFIG_BGP_DMA=y
+CONFIG_BGP_TORUS=y
+CONFIG_BGP_TORUS_DIAGNOSTICS=y
+# CONFIG_BGP_FRANKENTORUS is not set
+CONFIG_BGP_TORUS_IP_CHECKSUM=y
+CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT=20
+CONFIG_BGP_TORUS_ADAPTIVE_ROUTING=y
+
+CONFIG_BGP_VRNIC=n
+CONFIG_BGP_E10000=y
+CONFIG_BGP_E10000_RXB=1048576
+CONFIG_BGP_E10000_TXB=4096
+CONFIG_BGP_E10000_IP_CHECKSUM=y
+CONFIG_BGP_E10000_NAPI=y
+# CONFIG_BGP_E10000_EMAC_LOOPBACK is not set
+# CONFIG_BGP_E10000_PHY_LOOPBACK is not set
+# CONFIG_BGP_E10000_DBG is not set
+
+#
+# Wireless LAN
+#
+# CONFIG_WLAN_PRE80211 is not set
+# CONFIG_WLAN_80211 is not set
+# CONFIG_IWLWIFI_LEDS is not set
+
+#
+# Enable WiMAX (Networking options) to see the WiMAX drivers
+#
+# CONFIG_WAN is not set
+# CONFIG_PPP is not set
+# CONFIG_SLIP is not set
+# CONFIG_NETCONSOLE is not set
+# CONFIG_NETPOLL is not set
+# CONFIG_NET_POLL_CONTROLLER is not set
+
+CONFIG_TCP_HIATUS_COUNTS=y
+CONFIG_TCP_CONGESTION_OVERRIDES=y
+
+# CONFIG_ISDN is not set
+# CONFIG_PHONE is not set
+
+#
+# Input device support
+#
+# CONFIG_INPUT is not set
+
+#
+# Hardware I/O ports
+#
+# CONFIG_SERIO is not set
+# CONFIG_GAMEPORT is not set
+
+#
+# Character devices
+#
+# CONFIG_VT is not set
+CONFIG_DEVKMEM=y
+# CONFIG_SERIAL_NONSTANDARD is not set
+
+#
+# Serial drivers
+#
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=4
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_EXTENDED=y
+# CONFIG_SERIAL_8250_MANY_PORTS is not set
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+# CONFIG_SERIAL_8250_RSA is not set
+
+#
+# Non-8250 serial port support
+#
+# CONFIG_SERIAL_UARTLITE is not set
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL is not set
+CONFIG_UNIX98_PTYS=y
+# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_HVC_UDBG is not set
+# CONFIG_IPMI_HANDLER is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_NVRAM is not set
+# CONFIG_GEN_RTC is not set
+# CONFIG_R3964 is not set
+# CONFIG_RAW_DRIVER is not set
+# CONFIG_TCG_TPM is not set
+# CONFIG_I2C is not set
+# CONFIG_SPI is not set
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+# CONFIG_GPIOLIB is not set
+# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+# CONFIG_WATCHDOG is not set
+CONFIG_SSB_POSSIBLE=y
+
+#
+# Sonics Silicon Backplane
+#
+# CONFIG_SSB is not set
+
+#
+# Multifunction device drivers
+#
+# CONFIG_MFD_CORE is not set
+# CONFIG_MFD_SM501 is not set
+# CONFIG_HTC_PASIC3 is not set
+# CONFIG_MFD_TMIO is not set
+# CONFIG_REGULATOR is not set
+
+#
+# Multimedia devices
+#
+
+#
+# Multimedia core support
+#
+# CONFIG_VIDEO_DEV is not set
+# CONFIG_DVB_CORE is not set
+# CONFIG_VIDEO_MEDIA is not set
+
+#
+# Multimedia drivers
+#
+# CONFIG_DAB is not set
+
+#
+# Graphics support
+#
+# CONFIG_VGASTATE is not set
+# CONFIG_VIDEO_OUTPUT_CONTROL is not set
+# CONFIG_FB is not set
+# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
+
+#
+# Display device support
+#
+# CONFIG_DISPLAY_SUPPORT is not set
+# CONFIG_SOUND is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_MMC is not set
+# CONFIG_MEMSTICK is not set
+# CONFIG_NEW_LEDS is not set
+# CONFIG_ACCESSIBILITY is not set
+
+#
+# InfiniBand support
+#
+CONFIG_INFINIBAND=y
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_USER_MEM=y
+CONFIG_INFINIBAND_ADDR_TRANS=y
+CONFIG_INFINIBAND_SOFTRDMA=m
+CONFIG_INFINIBAND_SOFTIWARP=m
+# CONFIG_INFINIBAND_BGVRNIC is not set
+# CONFIG_INFINIBAND_BGVRNIC_ETH is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_CM is not set
+CONFIG_INFINIBAND_IPOIB_DEBUG=y
+CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
+# CONFIG_EDAC is not set
+# CONFIG_RTC_CLASS is not set
+# CONFIG_DMADEVICES is not set
+# CONFIG_UIO is not set
+# CONFIG_STAGING is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+# CONFIG_EXT2_FS_XIP is not set
+# CONFIG_EXT3_FS is not set
+# CONFIG_EXT4_FS is not set
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_FS_POSIX_ACL is not set
+CONFIG_FILE_LOCKING=y
+# CONFIG_XFS_FS is not set
+# CONFIG_GFS2_FS is not set
+# CONFIG_OCFS2_FS is not set
+# CONFIG_BTRFS_FS is not set
+CONFIG_DNOTIFY=y
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+CONFIG_AUTOFS4_FS=y
+# CONFIG_FUSE_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_MSDOS_FS is not set
+# CONFIG_VFAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+# CONFIG_TMPFS_POSIX_ACL is not set
+CONFIG_HUGETLBFS=n
+CONFIG_HUGETLB_PAGE=n
+# CONFIG_CONFIGFS_FS is not set
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_HFSPLUS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+CONFIG_CRAMFS=y
+# CONFIG_SQUASHFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_OMFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+# CONFIG_NFS_V3_ACL is not set
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3=y
+# CONFIG_NFSD_V3_ACL is not set
+# CONFIG_NFSD_V4 is not set
+CONFIG_LOCKD=y
+CONFIG_LOCKD_V4=y
+CONFIG_NFS_COMMON=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+# CONFIG_SUNRPC_REGISTER_V4 is not set
+CONFIG_RPCSEC_GSS_KRB5=y
+# CONFIG_RPCSEC_GSS_SPKM3 is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+# CONFIG_NLS is not set
+# CONFIG_DLM is not set
+
+#
+# Library routines
+#
+CONFIG_BITREVERSE=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+# CONFIG_CRC_CCITT is not set
+# CONFIG_CRC16 is not set
+# CONFIG_CRC_T10DIF is not set
+# CONFIG_CRC_ITU_T is not set
+CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
+# CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_PLIST=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAS_DMA=y
+CONFIG_HAVE_LMB=y
+
+#
+# Kernel hacking
+#
+CONFIG_PRINTK_TIME=y
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_WARN=1024
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_UNUSED_SYMBOLS is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_SHIRQ is not set
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_SCHEDSTATS is not set
+# CONFIG_TIMER_STATS is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_SLUB_DEBUG_ON is not set
+# CONFIG_SLUB_STATS is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_RT_MUTEX_TESTER is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_MUTEXES is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_INFO is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+# CONFIG_DEBUG_MEMORY_INIT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+# CONFIG_BACKTRACE_SELF_TEST is not set
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_FAULT_INJECTION is not set
+# CONFIG_LATENCYTOP is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+
+#
+# Tracers
+#
+# CONFIG_FUNCTION_TRACER is not set
+# CONFIG_SCHED_TRACER is not set
+# CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_BOOT_TRACER is not set
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+# CONFIG_STACK_TRACER is not set
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_SAMPLES is not set
+CONFIG_HAVE_ARCH_KGDB=y
+# CONFIG_KGDB is not set
+CONFIG_PRINT_STACK_DEPTH=64
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_CODE_PATCHING_SELFTEST is not set
+# CONFIG_FTR_FIXUP_SELFTEST is not set
+# CONFIG_MSI_BITMAP_SELFTEST is not set
+# CONFIG_XMON is not set
+# CONFIG_IRQSTACKS is not set
+# CONFIG_VIRQ_DEBUG is not set
+# CONFIG_BDI_SWITCH is not set
+# CONFIG_PPC_EARLY_DEBUG is not set
+
+#
+# Security options
+#
+# CONFIG_KEYS is not set
+# CONFIG_SECURITY is not set
+# CONFIG_SECURITYFS is not set
+# CONFIG_SECURITY_FILE_CAPABILITIES is not set
+CONFIG_CRYPTO=y
+
+#
+# Crypto core or helper
+#
+# CONFIG_CRYPTO_FIPS is not set
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_BLKCIPHER=y
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+# CONFIG_CRYPTO_GF128MUL is not set
+# CONFIG_CRYPTO_NULL is not set
+# CONFIG_CRYPTO_CRYPTD is not set
+# CONFIG_CRYPTO_AUTHENC is not set
+# CONFIG_CRYPTO_TEST is not set
+
+#
+# Authenticated Encryption with Associated Data
+#
+# CONFIG_CRYPTO_CCM is not set
+# CONFIG_CRYPTO_GCM is not set
+# CONFIG_CRYPTO_SEQIV is not set
+
+#
+# Block modes
+#
+CONFIG_CRYPTO_CBC=y
+# CONFIG_CRYPTO_CTR is not set
+# CONFIG_CRYPTO_CTS is not set
+CONFIG_CRYPTO_ECB=y
+# CONFIG_CRYPTO_LRW is not set
+CONFIG_CRYPTO_PCBC=y
+# CONFIG_CRYPTO_XTS is not set
+
+#
+# Hash modes
+#
+# CONFIG_CRYPTO_HMAC is not set
+# CONFIG_CRYPTO_XCBC is not set
+
+#
+# Digest
+#
+# CONFIG_CRYPTO_CRC32C is not set
+# CONFIG_CRYPTO_MD4 is not set
+CONFIG_CRYPTO_MD5=y
+# CONFIG_CRYPTO_MICHAEL_MIC is not set
+# CONFIG_CRYPTO_RMD128 is not set
+# CONFIG_CRYPTO_RMD160 is not set
+# CONFIG_CRYPTO_RMD256 is not set
+# CONFIG_CRYPTO_RMD320 is not set
+# CONFIG_CRYPTO_SHA1 is not set
+# CONFIG_CRYPTO_SHA256 is not set
+# CONFIG_CRYPTO_SHA512 is not set
+# CONFIG_CRYPTO_TGR192 is not set
+# CONFIG_CRYPTO_WP512 is not set
+
+#
+# Ciphers
+#
+# CONFIG_CRYPTO_AES is not set
+# CONFIG_CRYPTO_ANUBIS is not set
+# CONFIG_CRYPTO_ARC4 is not set
+# CONFIG_CRYPTO_BLOWFISH is not set
+# CONFIG_CRYPTO_CAMELLIA is not set
+# CONFIG_CRYPTO_CAST5 is not set
+# CONFIG_CRYPTO_CAST6 is not set
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_FCRYPT is not set
+# CONFIG_CRYPTO_KHAZAD is not set
+# CONFIG_CRYPTO_SALSA20 is not set
+# CONFIG_CRYPTO_SEED is not set
+# CONFIG_CRYPTO_SERPENT is not set
+# CONFIG_CRYPTO_TEA is not set
+# CONFIG_CRYPTO_TWOFISH is not set
+
+#
+# Compression
+#
+# CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_LZO is not set
+
+#
+# Random Number Generation
+#
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_PPC_CLOCK is not set
+# CONFIG_VIRTUALIZATION is not set
diff --git a/arch/powerpc/include/asm/bgcns.h b/arch/powerpc/include/asm/bgcns.h
new file mode 100644
index 00000000000000..238ad401a3cbfb
--- /dev/null
+++ b/arch/powerpc/include/asm/bgcns.h
@@ -0,0 +1,1060 @@
+/*
+ * (C) Copyright IBM Corp. 2007, 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Tom Gooding, IBM
+ */
+
+
+#ifndef _BGCNS_H
+#define _BGCNS_H
+
+
+#ifndef __ASSEMBLY__
+
+/*! @page CNS Common Node Services
+ *
+ * @section CNS_S10 Overview
+ *
+ * As the name implies, the <b>Common Node Services (CNS)</b> layer provides @b services
+ * to the kernel. These services may be simple queries abstracting various node
+ * specific data (such as DDR size) or they may be more sophisticated software services,
+ * such as common machine check handling. Additionally, some services may be implicit,
+ * such as the initialization of various hardware devices unique to Blue Gene, such as
+ * Netbus and SerDes.
+ *
+ * Services are not directly linked into the kernel, but rather are invoked from kernel
+ * code via a <b>service directory</b> which is itself part of an overall <b>service
+ * descriptor</b>. This service descriptor is constructed during initialization and
+ * is passed to the kernel when the kernel is booted. The service directory is a
+ * collection of <b>service references</b>.
+ *
+ * During partition (block) booting, ELF images are loaded onto the compute and I/O nodes.
+ * The bootloader (@i aka microloader) boots first and then transfers control to the Common
+ * Node Services layer so that it, in turn, may boot.
+ *
+ * Once the CNS layer has booted, control is transferred to the kernel so that it may also
+ * boot. All services provided by the CNS layer are immediately available at this time.
+ *
+ * @section CNS_S20 Programming Model
+ *
+ * A kernel running on top of the CNS layer is not statically linked to the common services.
+ * Instead, the services are called via function pointers provided by the services descriptor,
+ * which is described here: @ref _BGCNS_ServiceDirectory.
+ *
+ * The kernel must operate under the following rules and restrictions:
+ * @li The kernel must not alter the services descriptor. The descriptor must be treated as a read-only
+ * data structure even though the kernel may have the ability to alter it. Because CNS trusts the
+ * kernel, this also implies that the kernel must not expose the descriptor to any untrusted
+ * software (such as application code).
+ * @li The kernel must ensure that the CNS virtual memory region is mapped prior to invoking any
+ * service.
+ * @li The kernel must ensure that any data passed to services via parameters is mapped.
+ * Specifically, TLB entries must be mapped as shared (TID = 0) and must be either readable
+ * (input parameters) or readable and writeable (output parameters).
+ * @li The kernel must treat the virtual address range (@ref _BGCNS_Descriptor::baseVirtualAddress ,
+ * _BGCNS_Descriptor::baseVirtualAddress + @ref _BGCNS_Descriptor::size - 1) as reserved.
+ * That is, the kernel must not use this region of virtual memory for anything besides accessing
+ * the services descriptor.
+ * @li The kernel must treat the physical address range (@ref _BGCNS_Descriptor::basePhysicalAddress,
+ * _BGCNS_Descriptor::basePhysicalAddress + _BGCNS_Descriptor::size - 1) as reserved. The
+ * kernel must not map this memory for any other use.
+ * @li The kernel must not access any of the reserved virtual address regions with TLB settings that
+ * are different from those used by CNS. The kernel is allowed to unmap any of the reserved
+ * memory TLBs for its own use. However, in such a case and per the rule above, the kernel must
+ * ensure that the region is mapped prior to using any CNS facilities (such as invoking a service).
+ * @li CNS may need to map one or more TLB entries in order to access Blue Gene devices. In such a case,
+ * CNS may borrow TLB entries; the TLB will be returned to its original state before the service returns
+ * control to the invoking kernel. Kernels may avoid this behavior for specific devices by using
+ * the mapDevice service.
+ * @li The kernel's ELF image must avoid the 256K region of memory between 0x07000000 and 0x0703FFFF. This
+ * region is used for the pre-relocated CNS image and will be available for general use once CNS boot
+ * is complete.
+ * @li The kernel must not alter any reserved SPRs, DCRs or memory-mapped device registers.
+ *
+ * The CNS software may behave unpredictably if any of these rules and restrictions is violated.
+ *
+ * Kernels may make the following assumptions about CNS:
+ *
+ * @li The data passed in the firmware descriptor (see below) is static. Specifically, the base addresses,
+ * size and service directory will not change once CNS boot is complete.
+ *
+ * @subsection CNS_21 Programming Examples
+ *
+ * @subsubsection CNS_211 Obtaining the Personality
+ *
+ * The following example shows how to fetch a copy of the Blue Gene personality structure and also
+ * serves as a simple example of invoking a service:
+ *
+ * @code
+ *
+ * BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time
+ * _BGP_Personality_t* pers = (_BGP_Personality_t*)(*descr->services->getPersonalityData)();
+ * ...
+ * @endcode
+ *
+ * The programming model guarantees that the descriptor is static. Thus, one can provide a
+ * convenience method to make service invocation a little more readable
+ *
+ * @code
+ *
+ *
+ * static BGCNS_Descriptor* _cns_descriptor = ...; // obtained from CNS at boot time
+ *
+ * inline BGCNS_ServiceDirectory* cns() { return _cns_descriptor->services; }
+ *
+ * void foo() {
+ * _BGP_Personality_t* pers = (_BGP_Personality_t*)cns()->getPersonalityData();
+ * ...
+ * }
+ *
+ * @endcode
+ *
+ * This style will be used in all of the subsequent examples.
+ *
+ * @subsubsection CNS_212 SMP Initialization
+ *
+ * Common Node Services will launch the kernel on a single core (typically core 0) and will
+ * leave the remaining cores parked. The kernel can activate additional cores via the @c takeCPU
+ * service. Here is a very simple example of such kernel code:
+ *
+ * @code
+ *
+ * void anEntryPoint(unsigned core, void* arg_not_used) {
+ * // Do whatever your kernel needs to do here. Typically,
+ * // this function never returns. You will arrive here
+ * // when takeCPU is invoked (below).
+ * }
+ *
+ * void someCodeOnTheMainThread() {
+ *
+ * // ...
+ *
+ * unsigned N = cns()->getNumberOfCores();
+ *
+ * for (core = 1; core < N; core++) {
+ * if ( cns()->takeCPU(core, NULL, &anEntryPoint) != 0 ) {
+ * // error handling goes here
+ * }
+ * }
+ *
+ * // ...
+ * }
+ *
+ * @endcode
+ *
+ * @subsubsection CNS_213 Version Compatibility
+ *
+ * Common Node Services structures and APIs should remain compatible within maintenance
+ * releases and e-fixes. Kernel's may add a runtime check to ensure that the version
+ * of CNS is compatible with the version from compile time. This is done as follows:
+ *
+ * @code
+ *
+ * BGCNS_Descriptor* descr = ...; // obtained from CNS at boot time
+ *
+ * if ( ! BGCNS_IS_COMPATIBLE(descr) ) {
+ * // incompatible CNS (panic?)
+ * }
+ *
+ * @endcode
+ *
+ * @subsubsection CNS_23 Interrupts
+ *
+ * A kernel wanting to use the CNS interrupt services would first have to enable interrupts
+ * for the appropriate Blue Gene BIC group and IRQ within that group. This would likely be
+ * done at boot time. So, for example, such a kernel could enable interrupts for the Universal
+ * Performance Counter (group 5, IRQ 2) to be handled by the non-critical handler of core 0 as
+ * follows:
+ *
+ * @code
+ * cns()->enableInterrupt(5, 2, BGCNS_NonCritical, 0);
+ * @endcode
+ *
+ * Such a kernel might also maintain a collection of routines that act as subhandlers of the
+ * non-critical interrupt handler. In this example, we'll assume it is simply a two
+ * dimensional array indexed by group and IRQ:
+ *
+ * @code
+ * subhandlers[5][2] = &theUpcSubHandler;
+ * @endcode
+ *
+ * That kernel's non-critical interrupt handler would then typically handle all interrupts by
+ * successively invoking the getInterrupt() service to determine the group and IRQ, and then
+ * dispatching the appropriate subhandler. Additionally, the interrupt will be acknowledged
+ * so to avoid continuous interruption:
+ *
+ * @code
+ * unsigned grp, irq;
+ *
+ * while ( cns()->getInterrupt(&g, &i, BGCNS_NonCritical) == 0) {
+ * (*subhandlers[g][i])(); // dispatch the handler
+ * cns()->acknowledgeInterrupt(g,i); // ack the interrupt
+ * }
+ * @endcode
+ *
+ * @subsubsection CNS_24 Global Barriers and Interrupts
+ *
+ * The Blue Gene/P Global Interrupt Controller (aka GLINT) provides 4 independent channels
+ * that may be configured as either a global barrier or a global interrupt.
+ *
+ * Barriers are constructed by invoking the barrier service:
+ *
+ * @code
+ * unsigned channel = 0;
+ *
+ * // synchronize:
+ * int reset = 1;
+ * int rc;
+ * while ( (rc = cns()->globalBarrier_nonBlocking(channel, reset, 1000)) == BGCNS_RC_CONTINUE ) {
+ * reset = 0;
+ * }
+ *
+ * if ( rc == BGCNS_RC_COMPLETE ) {
+ * // good path
+ * }
+ * else {
+ * // error
+ * }
+ * @endcode
+ *
+ * Similarly, a barrier with a timeout can also be constructed:
+ *
+ * @code
+ * unsigned channel = 0;
+ * int reset = 1;
+ * unsigned long long startTime = ...; // obtain current time
+ * int rc;
+ *
+ * while ( (rc = cns()->globalBarrier_nonBlocking(channel,reset, 1000)) == BGCNS_RC_CONTINUE ) {
+ * reset = 0;
+ * unsigned long long currentTime = ...; // obtain current time
+ * if ( currentTime - startTime > timeout )
+ * break;
+ * }
+ *
+ * if ( rc == BGCNS_RC_COMPLETE ) {
+ * // good path
+ * }
+ * else {
+ * // timeout or error
+ * }
+ * @endcode
+ *
+ * A node may opt out of a barrier channel via the disableBarrier service:
+ *
+ * @code
+ *
+ * // some other synchronization mechanism needs to go here
+ *
+ * cns()->disableBarrier(channel);
+ *
+ * @endcode
+ *
+ * Conversely, it may opt back in:
+ *
+ * @code
+ * cns()->enableBarrier(channel, user_mode);
+ * @endcode
+ *
+ * By default, CNS reserves the use of channel 2 as a global interrupt for environmental
+ * monitoring. It also reserves channel 3 for use as a supervisory mode, compute-node
+ * only barrier. Compute node kernels are free to share this channel for the same
+ * purpose (compute node, supervisory barrier). The enable/disable barrier services
+ * may return errors if operating on a reserved channel.
+ *
+ * NOTE: The standard BG/P software stack, which includes I/O node Linux and Compute Node
+ * Kernel (CNK) uses channel 0 as an I/O node barrier during boot and transforms it to an
+ * compute-node only barrier when jobs execute.
+ *
+ *
+ * @section CNS_3 DMA Services
+ *
+ * The DMA services provided in CNS are low-level services. Interested readers of this area should
+ * also look at the documentation for the DMA SPIs, which are at a slightly higher level.
+ *
+ *
+ *
+ * @section CNS_4 Reserved and Preferred Addresses
+ *
+ *
+ * The following virtual memory regions are reserved and must be avoided by
+ * kernels:
+ *
+ * @code
+ *
+ * +------------+------------+------+----------------------+-----------------------+
+ * | Lower | Upper | Size | Usage | Attributes |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | CNSlow[1] | CNShigh[2] | 256K | CNS | I, Rs, Ws, Xs |
+ * +------------+------------+------+----------------------+-----------------------+
+ *
+ * [1] CNSlow = descr->baseVirtualAddress , usually 0xFFF40000
+ * [2] CNShigh = descr->baseVirtualAddress + descr->size - 1; usually 0xFFF7FFFF
+ *
+ * @endcode
+ *
+ * The following virtual memory regions are used by default in CNS. Kernels that wish to have
+ * a different memory map may do so via the mapDevice service.
+ *
+ * @code
+ * +------------+------------+------+----------------------+-----------------------+
+ * | Lower | Upper | Size | Usage | Attributes |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFB0000 | 0xFFFCFFFF | 64K | Torus | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFD0000 | 0xFFFD3FFF | 16K | DMA | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFD9000 | 0xFFFD9FFF | 4K | DevBus | I, G, Rs, Ws |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFDA000 | 0xFFFDAFFF | 4K | UPC | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFDC000 | 0xFFFDD3FF | 4K | Collective | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFDE000 | 0xFFFDEFFF | 4K | BIC | I, G, Rs, Ws, Xs |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFF0000 | 0xFFFF3FFF | 16K | Lockbox (supervisor) | I, G, Rs, Ws |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFF4000 | 0xFFFF7FFF | 16K | Lockbox (user) | I, G, Rs, Ws, Ru, Wu |
+ * +------------+------------+------+----------------------+-----------------------+
+ * | 0xFFFF8000 | 0xFFFFFFFF | 32K | SRAM | SWOA, WL1, Rs, Ws, Xs |
+ * +------------+------------+------+----------------------+-----------------------+
+ * @endcode
+ *
+ */
+
+
+#define BGCNS_VERSION 0x01030000 /* V1R3M0 efix 0 */
+#define BGCNS_IS_COMPATIBLE(descr) ( ((descr)->version & 0xFFFF0000) == (BGCNS_VERSION & 0xFFFF0000) ) //!< True iff the given descriptor is compatible with this version of CNS
+
+/* ! @enum BGCNS_InterruptType */
+/* ! @brief Defines the different types of interrupts known to */
+/* ! Common Node Services. */
+typedef enum {
+ BGCNS_NonCritical, //!< Non-critical interrupt
+ BGCNS_Critical, //!< Critical interrupt
+ BGCNS_MachineCheck, //!< Machine check
+} BGCNS_InterruptType;
+
+/* ! @enum BGCNS_FifoOperation */
+/* ! @brief Defines the types of FIFO operations */
+/* ! @see _BGCNS_ServiceDirectory::setDmaFifoControls */
+/* ! @see _BGCNS_ServiceDirectory::setDmaLocalCopies */
+/* ! @see _BGCNS_ServiceDirectory::setDmaPriority */
+typedef enum {
+ BGCNS_Disable = 0,
+ BGCNS_Enable = 1,
+ BGCNS_Reenable = 2
+} BGCNS_FifoOperation;
+
+/* ! @enum BGCNS_FifoFacility */
+/* ! @brief Defines the various types of FIFO facilities */
+typedef enum {
+ BGCNS_InjectionFifo, //!< Normal Injection FIFO
+ BGCNS_ReceptionFifo, //!< Normal Reception FIFO
+ BGCNS_ReceptionHeaderFifo, //!< Reception Header FIFO (typically used only for debugging)
+ BGCNS_InjectionFifoInterrupt,
+ BGCNS_ReceptionFifoInterrupt,
+ BGCNS_ReceptionHeaderFifoInterrupt,
+ BGCNS_InjectionCounterInterrupt,
+ BGCNS_ReceptionCounterInterrupt
+} BGCNS_FifoFacility;
+
+/* ! @enum BGCNS_LinkType */
+/* ! @brief Defines the types of MAC links. */
+/* ! @see _BGCNS_ServiceDirectory::macTestLink */
+typedef enum {
+ BGCNS_Transmitter, //!< A transmitter link.
+ BGCNS_Receiver //!< A receiver link.
+} BGCNS_LinkType;
+
+/* ! @enum BGCNS_EnvmonParameter */
+/* ! @brief Enumerates the various environmental monitor parameters. */
+/* ! @see _BGCNS_ServiceDirectory::getEnvmonParm */
+/* ! @see _BGCNS_ServiceDirectory::setEnvmonParm */
+typedef enum {
+ BGCNS_envmon_period = 0,
+ BGCNS_envmon_policy,
+ BGCNS_envmon_globintwire,
+
+ /* temporary */
+ BGCNS_envmon_duration,
+ BGCNS_envmon_ddrratio,
+ BGCNS_envmon_numparms
+} BGCNS_EnvmonParameter;
+
+
+#define BGCNS_RC_COMPLETE 0 //!< Indicates that the operation completed normally.
+#define BGCNS_RC_CONTINUE 1 //!< Indicates that the operation is still in progress.
+#define BGCNS_RC_TIMEOUT -1 //!< Indicates that the operation timed out.
+#define BGCNS_RC_ERROR -2 //!< Indicates that the operation failed.
+
+#define BGCNS_NUM_DMA_RECEPTION_GROUPS 4
+#define BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP 8
+
+/* ! @brief Describes the mapping of physical torus reception FIFOs to DMA reception FIFOs (rmFIFOs). */
+/* ! The first dimension indexes DMA reception groups, which are a combination of PID0 and PID1 bits */
+/* ! from the DMA packet. */
+/* ! */
+/* ! The second dimension indexes through the different dimensions: X+, X-, Y+, Y-, Z+, Z-, high priority */
+/* ! and local copy. */
+typedef unsigned char BGCNS_ReceptionMap[BGCNS_NUM_DMA_RECEPTION_GROUPS][BGCNS_NUM_DMA_RECEPTION_FIFOS_PER_GROUP];
+
+/* ! @brief Indicates that an interrupt is to be broadcast on all cores. */
+/* ! @see _BGCNS_ServiceDirectory::enableInterrupt */
+#define BGCNS_ALL_CORE_BROADCAST 0xFFFFFFFFu
+
+
+/* ! @enum BGCNS_DeviceMasks */
+/* ! @brief Provides a list of masks for various Blue Gene devices */
+
+typedef enum {
+ BGCNS_SRAM = 0x80000000u,
+ BGCNS_BIC = 0x40000000u,
+ BGCNS_Torus = 0x20000000u,
+ BGCNS_DevBus = 0x10000000u,
+ BGCNS_XEMAC = 0x08000000u,
+ BGCNS_LockBox = 0x04000000u,
+ BGCNS_Collective = 0x02000000u,
+ BGCNS_SRAM_Err = 0x01000000u,
+ BGCNS_DMA = 0x00800000u,
+ BGCNS_UPC = 0x00400000u
+} BGCNS_DeviceMasks;
+
+/* ! @typedef BGCNS_ServiceDirectory */
+/* ! @struct _BGCNS_ServiceDirectory */
+/* ! @brief The service directory is a collection of function pointers to services */
+/* ! provided by the Common Node Services. */
+typedef struct _BGCNS_ServiceDirectory {
+
+ /*------------------------------------------*/
+ /*--- Informational services for the node --*/
+ /*------------------------------------------*/
+
+
+ int (*isIONode)(void); //!< Returns 1 if this is an I/O node; 0 if not.
+
+
+ /*-----------------------------------------------------------------*/
+ /*--- Informational services for obtaining Raw personality data ---*/
+ /*-----------------------------------------------------------------*/
+
+ unsigned int (*getPersonalitySize)(void); //!< Returns the size (in bytes) of the Blue Gene personality.
+ void* (*getPersonalityData)(void); //!< Returns a pointer to the raw personality data.
+
+
+ /*-----------------------------------------------*/
+ /*--- Services for Symmetric Multi-Processing ---*/
+ /*-----------------------------------------------*/
+
+
+ unsigned (*getNumberOfCores)(void); //!< Returns the number of CPUs on this node.
+
+ /* ! @brief Called by the kernel to activate a CPU. */
+ /* ! @param[in] cpu The index of the cpu (core) to be activated. */
+ /* ! @param[in] entry The (kernel) entry point function. This function will be invoked when */
+ /* ! the CPU is actually activated. */
+ /* ! @param[in] arg A pointer to the lone argument to be passed to the entry point. */
+ /* ! @return Zero (0) if the CPU was succsessfully activated. Non-zero if the CPU was not */
+ /* ! activated (e.g. invalid cpu argument, or the cpu has already been */
+ /* ! activated). */
+ /* ! @remarks See Section x of the Common Node Services overview for details. */
+ int (*takeCPU)(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg));
+
+
+ /*--------------------------------------*/
+ /*--- Services for Blue Gene devices ---*/
+ /*--------------------------------------*/
+
+ /* ! @brief Checks active devices for a clean termination state and returns 0 */
+ /* ! if everything is nominal. Returns non-zero if any anomaly is */
+ /* ! detected and logs violations. */
+ /* ! @param[in] job_rc specifies the return code of the job that is terminating. */
+ int (*terminationCheck)(int job_rc);
+
+ /*-------------------------------*/
+ /*--- Services for interrupts ---*/
+ /*-------------------------------*/
+
+
+ /* ! @brief Enables the specified interrupt. For all interrupts except inter-processor */
+ /* ! interrupts, the interrupt will bendled by the specified core. */
+ /* ! @param[in] group Specifies the Blue Gene interrupt group */
+ /* ! @param[in] irq Specifies the interrupt index within the group */
+ /* ! @param[in] itype Specifies the type of interrupt that hardware will present */
+ /* ! for this group/irq. */
+ /* ! @param[in] core Specifies which core will handle the interrupt. If specified as */
+ /* ! BGCNS_ALL_CORE_BROADCAST, then all cores will handle the interrupt. */
+ /* ! @return Returns zero (0) if the interrupt is enabled and returns non-zero if it was not */
+ /* ! (including the case of bad arguments). */
+ int (*enableInterrupt)(unsigned group, unsigned irq, BGCNS_InterruptType itype, unsigned core);
+
+ /* ! @brief Disables the specified interrupt. */
+ /* ! @param[in] group Specifies the Blue Gene interrupt group */
+ /* ! @param[in] irq Specifies the interrupt index within the group */
+ /* ! @return Returns zero (0) if the interrupt is disabled and returns non-zero if it was not */
+ /* ! (including the case of bad arguments). */
+ int (*disableInterrupt)(unsigned group, unsigned irq);
+
+ /* ! @brief Queries the Blue Gene interrupt hardware for interrupts of the given */
+ /* ! type and returns the group/IRQ. This service is typically used in the */
+ /* ! context of an interrupt handler. Since multiple interrupt conditions */
+ /* ! may be present, the service is typically invoked from the handler */
+ /* ! (along with corresponding acknowledgement) until the return code */
+ /* ! indicates that no more interrupts are present. */
+ /* ! @param[out] group Specifies the Blue Gene interrupt group. The value is valid */
+ /* ! only when the return code is 0. */
+ /* ! @param[out] irq Specifies the interrupt index within the group. The value is */
+ /* ! valid only when the reutrn code is zero. */
+ /* ! @param[in] itype Specifies the type of interrupt being queried. */
+ /* ! @return Returns zero (0) if an interrupt condition of the specified type exists. Returns -1 */
+ /* ! if no such condition exists. */
+ int (*getInterrupt)(BGCNS_InterruptType itype, unsigned* group, unsigned* irq);
+
+ /* ! @brief Acknowledges the specified interrupt, thus clearing the interrupt */
+ /* ! condition in the interrupt controller hardware. */
+ /* ! @param[in] group Specifies the Blue Gene interrupt group */
+ /* ! @param[in] irq Specifies the interrupt index within the group */
+ /* ! @return Returns zero (0) if the interrupt is acknowledged and returns non-zero if it was not */
+ /* ! (including the case of bad arguments). */
+ /* ! @remarks Note that for some interrupts, it is not sufficient to only acknowledge */
+ /* ! the interrupt; the hardware condition that triggered the interrupt may */
+ /* ! also need to be cleared. */
+ int (*acknowledgeInterrupt)(unsigned group, unsigned irq);
+
+ /* ! @brief Raises the specified interrupt. */
+ /* ! @param[in] group Specifies the Blue Gene interrupt group */
+ /* ! @param[in] irq Specifies the interrupt index within the group */
+ int (*raiseInterrupt)(unsigned group, unsigned irq);
+
+
+ /*------------------------*/
+ /*--- Mailbox services ---*/
+ /*------------------------*/
+
+ unsigned (*getMailboxMaximumConsoleInputSize)(void); //!< Returns the actual maximum console message input data size.
+ unsigned (*getMailboxMaximumConsoleOutputSize)(void); //!< Returns the actual maximum console message output data size.
+
+ /* ! @brief Writes a text message to the output mailbox. */
+ /* ! @param[in] msg a pointer to the message to be written. */
+ /* ! @param[in] msglen the length (in bytes) of the message to be written. */
+ /* ! @remarks As with all common services, the message data area must be mapped via */
+ /* ! the TLB when the service is called. The behavior is not defined if this */
+ /* ! is not the case. */
+ /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */
+ /* wrong (including a message that is too large). */
+ int (*writeToMailboxConsole)(char *msg, unsigned msglen);
+
+ /* ! @brief Writes a text message to the output mailbox but does not wait for a */
+ /* ! response back from the control system. When this service is used, */
+ /* ! the caller must poll for completion using the testForOutboxCompletion */
+ /* ! service. */
+ /* ! @param[in] msg a pointer to the message to be written. */
+ /* ! @param[in] msglen the length (in bytes) of the message to be written. */
+ /* ! @remarks As with all common services, the message data area must be mapped via */
+ /* ! the TLB when the service is called. The behavior is not defined if this */
+ /* ! is not the case. */
+ /* ! @return Zero (0) if the message was written successfully, non-zero if anything went */
+ /* wrong (including a message that is too large). */
+ int (*writeToMailboxConsole_nonBlocking)(char* msg, unsigned msglen);
+
+ /* ! @brief Tests the outbox to see if the last message was picked up by the control */
+ /* ! system. */
+ /* ! @return Zero (0) if the last message was piecked and returns non-zero if it has not. */
+ /* ! @remarks Typically the caller will invoke this service after having called */
+ /* ! writeToMailboxConsole_nonBlocking and will then invoke this service in a */
+ /* ! loop until zero is returned. */
+ int (*testForOutboxCompletion)(void);
+
+ /* ! @brief Reads a message from the input mail box. */
+ /* ! @param msg a pointer to a data area into which the message will be placed. */
+ /* ! @param maxMsgSize gives the size of the data area, i.e. the largest message */
+ /* ! that may be safely received into the buffer. */
+ /* ! @return The actual length of the message (0 if no message was receieved). */
+ /* ! @remarks As with all common services, the message data area must be mapped */
+ /* ! via the TLB when this service is called. The results are not defined if */
+ /* ! this is not the case. */
+ unsigned (*readFromMailboxConsole)(char *buf, unsigned bufsize);
+
+ int (*testInboxAttention)(void); //!< Returns 1 if something is available in the input mailbox.
+
+ int (*_no_longer_in_use_1_)(void); //!< Obsolete ... do not use.
+
+ int (*writeToMailbox)(void* message, unsigned length, unsigned cmd);
+
+ /*------------------------------------*/
+ /*--- RAS and diagnostic services ---*/
+ /*------------------------------------*/
+
+ /* ! @brief TBD */
+ void (*machineCheck)(void *regs);
+
+ /* ! @brief Writes a RAS event to the log. */
+ /* ! @param[in] facility The facility (aka component). */
+ /* ! @param[in] unit The unit (aka subcomponent). */
+ /* ! @param[in] err_code The error code. */
+ /* ! @param[in] numDetails The number of additional details. */
+ /* ! @param[in] details The list of additional details. */
+ /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+ /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */
+ int (*writeRASEvent)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] );
+
+ /* ! @brief Writes a RAS string to the log. */
+ /* ! @param[in] facility The facility (aka component). */
+ /* ! @param[in] unit The unit (aka subcomponent). */
+ /* ! @param[in] err_code The error code. */
+ /* ! @param[in] str The message string being written (ASCII encoded, null-terminated). Note that the length of this string is */
+ /* ! limited to _BGP_RAS_ASCII_MAX_LEN characters. The implementation may choose to truncate the string if it exceeds this */
+ /* ! length. */
+ /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */
+ /* ! string was truncated). */
+ /* ! @see bgp/arch/include/common/bgp_ras.h for details on facility, unit and err_code. */
+ int (*writeRASString)( unsigned facility, unsigned unit, unsigned short err_code, char* str );
+
+
+ /*---------------------------------*/
+ /*--- Global Interrupt services ---*/
+ /*---------------------------------*/
+
+ /* ! @brief A global (compute node) barrier. This call will block until all other compute nodes */
+ /* ! in the partition also arrive at the barrier. */
+ int (*globalBarrier)(void);
+
+ /* ! @brief A global (compute node) barrier. This call will block until all other compute nodes */
+ /* ! in the partition also arrive at the barrier or until the timeout is reached. */
+ /* ! @param timeoutInMillis specifies the timeout duration. Units are milliseconds. */
+ /* ! @return BGCNS_RC_COMPLETE if the barrier completed. BGCNS_RC_TIMEOUT if the barrier timed */
+ /* ! out. BGCNS_RC_ERROR if some other error occurred. */
+ int (*globalBarrierWithTimeout)(unsigned timeoutInMillis);
+
+
+
+ /*-------------------------*/
+ /*--- Network services ---*/
+ /*-------------------------*/
+
+
+ void (*initializeNetworks)(void); //!< @todo Is this is going away??? Talk to Andy
+
+ void (*_no_longer_in_use_381)(void); //!< @warning Do not use
+
+ void (*_no_longer_in_use_384)(void);//!< @warning Do not use
+
+
+ /*--------------------------*/
+ /*--- DMA unit services ---*/
+ /*--------------------------*/
+
+#define BGCNS_DMA_CAPTURE_X_PLUS 0 //!< watch the X+ receiver
+#define BGCNS_DMA_CAPTURE_X_MINUS 1 //!< watch the X- receiver
+#define BGCNS_DMA_CAPTURE_Y_PLUS 2 //!< watch the Y+ receiver
+#define BGCNS_DMA_CAPTURE_Y_MINUS 3 //!< watch the Y- receiver
+#define BGCNS_DMA_CAPTURE_Z_PLUS 4 //!< watch the Z+ receiver
+#define BGCNS_DMA_CAPTURE_Z_MINUS 5 //!< watch the Z- receiver
+#define BGCNS_DMA_CAPTURE_DISABLE 7 //!< disable link capturing
+
+ /* ! @brief Sets the link capture facility of the DMA unit to watch the specified */
+ /* ! receiver (or disable). */
+ /* ! @param[in] link Specifies the link being monitored. Use the BGCNS_DMA_CAPTURE_* */
+ /* ! mnemonics defined above. */
+ /* ! @return Zero if the operation succeeded, non-zero if it did not (e.g. an invalid */
+ /* ! link was specified). */
+ int (*setDmaLinkCapture)(int link);
+
+ /* ! @brief Clears the link capture unit so that another packet can be captured. */
+ void (*clearDmaLinkCapture)(void);
+
+#define BGCNS_RC_DMA_NO_PACKET_CAPTURED 0
+#define BGCNS_RC_DMA_CAPTURE_UNIT_ERROR -1
+#define BGCNS_RC_DMA_DATA_CONFLICT -2 //!< if initial read indicates a bad packet is captured but subsequent read shows bad packet not captured
+#define BGCNS_RC_DMA_DATA_CONFLICT2 -3 //!< if bad packet is captured, but all the bytes are the same
+ /* ! @brief Reads the DMA link capture packets. */
+ int (*readDmaLinkCapturePackets)(unsigned char* good_packet, int* good_packet_size, unsigned char* bad_packet, int* bad_packet_size);
+
+
+#define BGCNS_DMA_ALL_GROUPS 0xFFFFFFFF
+
+ /* ! @brief Sets FIFO controls for the DMA unit. */
+ /* ! */
+ /* ! An operation on facility BGCNS_InjectionFifo enables or disables a subset of the 128 DMA injection FIFOs. */
+ /* ! The FIFOs are organized into four groups of 32. The mask argument is a bit mask (bit i controls the i-th imFIFO */
+ /* ! within that group, that is the (group*32)+i imFIFO. */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionFifo enables or disables a subset of the 32 DMA reception FIFOs. */
+ /* ! The group argument is ignored and the mask argument is a bit mask (bit i controls the i-th reception FIFO). */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionHeaderFifo enables or disables the header FIFO for the specified */
+ /* ! group. The mask argument is ignored. Note that the header FIFO is typically used for debugging. */
+ /* ! */
+ /* ! An operation on facility BGCNS_InjectionFifoInterrupt enables or disables threshold interrupts for the */
+ /* ! specified injection FIFO. Threshold interrupts occur if available space is less than the configured */
+ /* ! threshold when the FIFO is used for a remote get operation. The group and mask arguments are as */
+ /* ! described in the BGCNS_InjectionFifo operation (above). */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionFifoInterrupt enables or disables interrupts for the specified */
+ /* ! reception FIFO(s). If enabled, an interrupt will occur when the reception FIFO's available space drops */
+ /* ! below the configured threshold. The group argument selects the interrupt type (type 0, 1, 2 or 3). */
+ /* ! The mask argument is a bit mask selecting one or more of the 32 normal reception FIFOs. */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionHeaderFifoInterrupt enables or disables interrupts for the specified */
+ /* ! reception header FIFO. Reception header FIFOs are used for debug purposes only. */
+ /* ! */
+ /* ! An operation on facility BGCNS_InjectionCounterInterrupt enables or disables "Counter Hit Zero" interrupts. */
+ /* ! The group argument does not specify counter group, but rather specifies interrupt 0, 1, 2 or 3. The mask */
+ /* ! argument is a bit mask that selects one or more counter subgroups to operate on (the 256 injection counters */
+ /* ! are partitioned into 32 subgroups of 8 counters). */
+ /* ! */
+ /* ! An operation on facility BGCNS_ReceptionCounterInterrupt enables or disables "Counter Hit Zero" interrupts */
+ /* ! for reception counters. The group and mask arguments are the as as described in the the */
+ /* ! BGCNS_InjectionCounterInterrupt operation (above). */
+ /* ! */
+ /* ! The buffer argument is used as a means to save/restore in an opaque manner. This is achieved by passing */
+ /* ! a non-NULL buffer to a disable operation and subsequently passing that buffer during a reenable */
+ /* ! operation (the buffer is used to snapshot state). */
+ /* ! */
+ /* ! */
+ /* ! @code */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | Facility | group | mask | Notes | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_InjectionFifo | 0..3 | 32 bits | [1] | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_ReceptionFifo | n/a | 32 bits | [2] | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_ReceptionHeaderFifo | 0..3, ALL | N/A | | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_InjectionFifoInterrupt | 0..3 | 32 bits | [1] | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_ReceptionFifoInterrupt | 0..3 | 32 bits | [3] | */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_InjectionCounterInterrupt | 0..3 | 32 bits | [3][4]| */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! | BGCNS_ReceptionCounterInterrupt | 0..3 | 32 bits | [3][4]| */
+ /* ! +---------------------------------+-----------+---------+-------+ */
+ /* ! */
+ /* ! [1] There are 128 injection FIFOs partitioned into 4 groups of 32. */
+ /* ! [2] There are 32 normal reception FIFOs in BG/P. */
+ /* ! [3] There are 4 interrupt lines. The group argument selects one these 4. */
+ /* ! [4] There are 256 counters of each type (injection and reception). The */
+ /* ! 32-bit mask partitions them into groups of 8. */
+ /* ! */
+ /* ! @endcode */
+ /* ! */
+ /* ! @param[in] operation defines the type of operation being performed (enable, disable, or re-enable). */
+ /* ! @param[in] facility defines the type of FIFO being configured. */
+ /* ! @param[in] group is interpreted differently based on the facility. */
+ /* ! @param[in] mask is interpreted differently based on the facility. */
+ /* ! @param[out] buffer is interpreted differently based on the operation and facility. It is generally used to capture */
+ /* ! a copy of the facility's current state in an enable operation (and may be null, in which case it is ignored). It is */
+ /* ! generally used as the value to be loaded in a re-enable operation. In this manner, a state value captured by an enable */
+ /* ! operation may be easily restored by a subsequent re-enable operation. The buffer argument is generally ignored by */
+ /* ! disable operations. */
+ int (*setDmaFifoControls)(BGCNS_FifoOperation operation, BGCNS_FifoFacility facility, unsigned group, unsigned mask, unsigned* buffer);
+
+ /* ! @brief Maps injection FIFOs onto physical (torus hardware) FIFOs. */
+ /* ! @param[in] group specifies the injection FIFO group. */
+ /* ! @param[in] fifoIds is an array of length numberOfFifos whose elements are the identifiers of the imFIFO (within that */
+ /* ! given group). */
+ /* ! @param[in] injection_map is an array of length numberOfFifos whose elements are 8-bit masks identifying which of the */
+ /* ! physical torus injection FIFOs are mapped. Bits 0-3 correspond to torus group 0, and bits 4-7 correspond to torus */
+ /* ! group 1. Bits 3 and 7 are the high priority FIFOs. */
+ /* ! @param[in] numberOfFifos describes the number of elements contained in the fifoIds and injection_map arguments. */
+ /* ! @return Zero if the map was properly set. Non-zero if it was not, including the case of illegal arguments. */
+ /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */
+ /* ! argument is 0..3 and the legal range for the fifoIds[] elements is 0..31. */
+
+ int (*setDmaInjectionMap)(unsigned group, unsigned fifoIds[], unsigned char injection_map[], unsigned numberOfFifos);
+
+ /* ! @brief Enables or disables "local copy" behavior for the specified injection FIFOs. A local copy injection FIFO */
+ /* ! can be used to perform memory copies within a node via the DMA engine. */
+ /* ! @param[in] operation specifies whether local copies is being enabled or disabled on the specified FIFOs. The BGCNS_Reenable */
+ /* ! operation is not supported. */
+ /* ! @param[in] group specifies the injection FIFO group. */
+ /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */
+ /* ! @return Zero if the operation succeeded; non-zero if it did not. */
+ /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */
+ /* ! argument is 0..3. */
+ int (*setDmaLocalCopies)(BGCNS_FifoOperation operation, unsigned group, unsigned bits);
+
+ /* ! @brief Enables or disables the priority bit for the specified injection FIFOs. The priority bit */
+ /* ! is used by the hardware arbitration (details are not further documented here). */
+ /* ! @param[in] operation specifies whether priority bits are being set or cleared. */
+ /* ! @param[in] group specifies the injection FIFO group. */
+ /* ! @param[in] bits selects one or more injection FIFOs from within the group on which to operate. */
+ /* ! @note In BG/P, there are 128 injection FIFOs partitioned into 4 groups of 32. So the legal range of the group */
+ /* ! argument is 0..3. */
+ int (*setDmaPriority)(BGCNS_FifoOperation operation, unsigned group, unsigned bits);
+
+ /* ! @brief Sets the mapping from physical (torus hardware) reception FIFOs to reception FIFOs. The hardware supports */
+ /* ! 8 torus FIFOs (six torus dimensions plus high priority plus local copy). Furthermore, the hardware supports */
+ /* ! 4 groups as derived from the PID0 and PID1 bits of the DMA packet. Thus the mapping is a 4 x 8 matrix of */
+ /* ! reception FIFO ids. */
+ /* ! @param[in] torus_reception_map maps {group} X {torus-hardware-FIFOs} --> reception FIFOs. */
+ /* ! @param[in] fifo_types is an array of N values specifying the type of each normal reception FIFO (see also threshold). For BGP, */
+ /* ! N=2 (there are 32 normal reception FIFOs). */
+ /* ! @param[in] header_types is an array of N values specifying the type of each reception header FIFO (see also threshold). For */
+ /* ! BGP, N=4 (there are 4 reception header FIFOs). Note that reception header FIFOs are typically only used for debugging purposes. */
+ /* ! @param[in] threshold is an array of N threshold values. The value threshold[i] specifies the threshold value for reception */
+ /* ! FIFO type i. If reception FIFO interrupts are enabled (see setDmaFifoControls) and a reception FIFO's available space drops */
+ /* ! below its threshold, an interrupt is driven. For BGP, N=2 (there are type 0 and type 1 injection FIFOs). */
+ int (*setDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned header_types[], unsigned threshold[]);
+
+ /* ! @brief Gets the reception map. */
+ /* ! @see setDmaReceptionMap for descriptions of the map and arguments. */
+ int (*getDmaReceptionMap)( BGCNS_ReceptionMap torus_reception_map, unsigned fifo_types[], unsigned short* store_headers, unsigned header_types[], unsigned threshold[]);
+
+
+ /* ! @deprecated */
+ int (*_used_to_be_clearDmaFullReceptionFifo__removed)(void);
+
+
+ /* ! @brief Resets the MAC unit's PHY. */
+ /* ! @return Zero if the unit was properly reset. Returns non-zero if some error occurred. */
+ /* ! @deprecated See macResetPHY_nonBlocking. */
+ int (*macResetPHY)(void);
+
+ /* ! @brief Tests the MAC unit's link. */
+ /* ! @param[in] link_type specifies the type of link to be tested. */
+ /* ! @return One (1) if the link is active; zero (0) if it is not. */
+ /* ! @deprecated See macTestLink_nonBlocking */
+ int (*macTestLink)(BGCNS_LinkType link_type);
+
+ /* ! @brief Reads one of the MAC's XGMII registers. */
+ /* ! @param[in] device_address */
+ /* ! @param[in] port_address */
+ /* ! @param[in] register_address */
+ /* ! @return The register's value or a negative number if some error occurred. */
+ /* ! @deprecated Low level MAC register access is being eliminated. */
+ int (*macXgmiiRead)(unsigned device_address, unsigned port_address, unsigned register_address);
+
+ /* ! @brief Writes one of the MAC's XGMII registers. */
+ /* ! @param[in] device_address */
+ /* ! @param[in] port_address */
+ /* ! @param[in] register_address */
+ /* ! @param[in] value */
+ /* ! @return Zero (0) if the register was successfully written; non-zero if some error occurred. */
+ /* ! @deprecated Low level MAC register access is being eliminated. */
+ int (*macXgmiiWrite)(unsigned device_address, unsigned port_address, unsigned register_address, unsigned value);
+
+
+ /* ! @brief Trains SerDes in a non-blocking manner. The standard usage is to inititate */
+ /* ! training with trainSerDes(1), check the return code, and then continue to invoke */
+ /* ! trainSerDes(0) as long as the return code is BGCNS_RC_CONTINUE. */
+ /* ! @param[in] reset Should be 1 when initiating a retraining sequence and 0 for any */
+ /* ! continuations. */
+ /* ! @return BGCNS_RC_CONTINUE if training is still ongoing (the caller should re-invoke */
+ /* ! the service again (with reset=0). BGCNS_RC_COMPLETE if training is complete. */
+ /* ! BGCNS_ERROR if some error has occurred. */
+ int (*trainSerDes)(int reset);
+
+ /* ! @brief Fetches the value of the specified control parameter of the environmental monitor. */
+ /* ! @param[in] parameter Parameter to retrieve. Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */
+ /* ! @param[in] value Pointer to the storage location that will contain the parameter's value when the function successfully returns. */
+ /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */
+ int (*getEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int* value);
+
+ /* ! @brief Stores a value to the specified control parameter of the environmental monitor */
+ /* ! @param[in] parameter Parameter to store. Should be a valid parameter in the BGCNS_EnvmonParameter enumeration */
+ /* ! @param[in] value New value for the parameter */
+ /* ! @return Zero if the register was successfully fetched; non-zero if some error occurred. */
+ int (*setEnvmonParm)(BGCNS_EnvmonParameter parameter, unsigned int value);
+
+ /* ! @brief Performs checks and ensures that the node will continue to operate within tolerances. */
+ /* ! @note MUST be called regularly as indicated by nextCallbackTime parameter */
+ /* ! @param[in] nextCallbackTime Upon returning, this will contain the PPC Timebase register value indicating when the next */
+ /* ! time the operating system needs to call performEnvMgmt. Failure to do so may result in poorly performing */
+ /* ! nodes or shutdown of the block / rack. */
+ int (*performEnvMgmt)(unsigned long long* nextCallbackTime);
+
+
+ /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */
+ /* ! response back from the control system. When this service is used, */
+ /* ! the caller must poll for completion using the testForOutboxCompletion */
+ /* ! service. */
+ /* ! @param[in] facility The facility (aka component). See bgp_ras.h for a list of facilities. */
+ /* ! @param[in] unit The unit (aka subcomponent). See bgp_ras.h for a list of units. */
+ /* ! @param[in] err_code The error code. See bgp_ras.h for a list of error code.s */
+ /* ! @param[in] numDetails The number of additional details. */
+ /* ! @param[in] details The list of additional details. */
+ /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+ int (*writeRASEvent_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, unsigned numDetails, unsigned details[] );
+
+ /* ! @brief Writes a RAS message to the output mailbox but does not wait for a */
+ /* ! response back from the control system. When this service is used, */
+ /* ! the caller must poll for completion using the testForOutboxCompletion */
+ /* ! service. */
+ /* ! @param[in] facility The facility (aka component). See bgp_ras.h for a list of facilities. */
+ /* ! @param[in] unit The unit (aka subcomponent). See bgp_ras.h for a list of units. */
+ /* ! @param[in] err_code The error code. See bgp_ras.h for a list of error code.s */
+ /* ! @param[in] str The message string being written (ASCII encoded, null-terminated). Note that the length of this string is */
+ /* ! limited to _BGP_RAS_ASCII_MAX_LEN characters. The implementation may choose to truncate the string if it exceeds this */
+ /* ! length. */
+ /* ! @return Zero if the entire message was written; non-zero if some error condition occurred (including the case where the */
+ /* ! string was truncated). */
+ /* ! @return Zero if the message was written, non-zero if some error condition occurred. */
+ int (*writeRASString_nonBlocking)( unsigned facility, unsigned unit, unsigned short err_code, char* str );
+
+ /* ! @brief Sets the core's timebase registers to the specified value. */
+ /* ! @param[in] newtime The new 64-bit timebase */
+ /* ! @return Zero if the timebase was successfully set, non-zero if some error condition occurred. */
+ /* ! @deprecated */
+ int (*synchronizeTimebase)(unsigned long long newtime);
+
+ /* ! @brief Sets the node's DMA physical protection settings. */
+ /* ! @note on BGP, there are a maximum of 8 read ranges and 8 write ranges */
+ /* ! @return Zero if the DMA ranges were set, non-zero if some error condition occurred. */
+ int (*dmaSetRange)(unsigned numreadranges, unsigned long long* read_lower_paddr, unsigned long long* read_upper_paddr,
+ unsigned numwriteranges, unsigned long long* write_lower_paddr, unsigned long long* write_upper_paddr);
+
+ /* ! @brief Checks the status of the devices and reports correctible RAS (if any) */
+ /* ! @param[in] clear_error_counts If non-zero, function will also reset the hardware error counters after posting any RAS. */
+ /* ! @return Zero if successful, non-zero if some error condition occurred. */
+ int (*statusCheck)(unsigned clear_error_counts);
+
+ /* ! @brief Stops the DMA and clears any reception unit failure */
+ int (*stopDma)(void);
+
+ /* ! @brief Starts the DMA */
+ int (*startDma)(void);
+
+ /* ! @brief Performs a hard exit. The status code is provided to the control system. */
+ /* ! @return This service never returns. */
+ void (*exit)(int rc);
+
+ /* ! @brief Resets the MAC unit's PHY but does not block. */
+ /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+ /* ! reset sequence. That is, callers should initiate a reset sequence with reset=1 and then */
+ /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this servicate again with */
+ /* ! reset=0. */
+ /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+ /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+ /* ! to indicate that it needs additional time. */
+ /* ! @return BGCNS_RC_COMPLETE if the unit was properly reset. BGCNS_RC_CONTINUE if the reset operation is */
+ /* ! not yet complete. BGCNS_RC_ERROR if the reset operation failed. */
+ int (*macResetPHY_nonBlocking)(int reset, unsigned timeoutInMillis);
+
+ /* ! @brief Tests the MAC unit's link but does not block. */
+ /* ! @param[in] link_type specifies the type of link to be tested. */
+ /* ! @param[out] result points to the link status, which is valid only when the return code is */
+ /* ! BGCNS_RC_COMPLETE. A value of one (1) indicates that the link is active; zero (0) */
+ /* ! indicates that it is inactive. */
+ /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+ /* ! test link sequence. That is, callers should initiate a sequence with reset=1 and then */
+ /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this service again with */
+ /* ! reset=0. */
+ /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+ /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+ /* ! to indicate that it needs additional time. */
+ /* ! @return BGCNS_RC_COMPLETE if the test is complete (result is valid only in this case). BGCNS_RC_CONTINUE */
+ /* ! if the reset operation is not yet complete. BGCNS_RC_ERROR if the reset operation failed. */
+ int (*macTestLink_nonBlocking)(BGCNS_LinkType link_type, unsigned* result, int reset, unsigned timeoutInMillis);
+
+ void * _not_in_use_1068;
+ void * _not_in_use_1069;
+
+
+ /* ! @brief Indicates that a new job is about to start. */
+ /* ! @return Zero (0) if CNS is ready for a new job to start. Returns non-zero otherwise. */
+ int (*startNextJob)(void);
+
+ /* ! @brief Indicates that the CNS should use the specified virtual address when accessing the */
+ /* ! given device. When a device is remapped, CNS will no longer make any attempt to map */
+ /* ! a TLB to access that device -- it is the responsibility of the kernel to handle the */
+ /* ! TLB either proactively or reactively (via a fault). */
+ /* ! @param[in] device specifies the device being mapped. */
+ /* ! @param[in] base_address is the root virtual address of the device. The address should be */
+ /* ! naturally aligned (relative to the size of the device). See the seciton Reserved and */
+ /* ! Preferred Addresses for more information. */
+ /* ! @return Zero (0) if the device was successfully remapped. Returns non-zero if it was not. */
+ /* ! @remarks The lock box is in active use by CNS during early boot and thus it is not */
+ /* ! possible to remap the BGCNS_LockBox device until all cores are activated by the kernel */
+ /* ! (that is, takeCPU has been called for all cores). */
+ int (*mapDevice)(BGCNS_DeviceMasks device, void* base_address);
+
+ /* ! @brief Enables barriers on the specified channel. */
+ /* ! @param channel specifies the channel being enabled. */
+ /* ! @param user_mode indicates whether the barrier is to be used in user-mode code. */
+ /* ! @return Zero if global barriers were enabled. Returns non-zero if the request could not be */
+ /* ! completed, including the case of attempting to enable a reserved channel. */
+ int (*enableBarrier)(unsigned int channel, int user_mode);
+
+ /* ! @brief Disables barriers on the specified channel. */
+ /* ! @return Zero if global barriers were disabled. Returnsnon-zero if the request could not be */
+ /* ! completed, including the case of attempting to disable a reserved channel. */
+ int (*disableBarrier)(unsigned int channel);
+
+ /* ! @brief A global barrier that does not block indefinitely. */
+ /* ! @param channel indicates the GLINT hardware channel to use. */
+ /* ! @param reset indicates whether this is the beginning (1) or a continuation (0) of a barrier */
+ /* ! sequence. That is, caller should inititate a barrier operation by passing reset=1 and then, */
+ /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke the service again with */
+ /* ! reset=0. */
+ /* ! @param timeoutInMillis is the (approximate) number of milliseconds that this service is allowed */
+ /* ! to wait for barrier participants before returning to the caller. */
+ /* ! @return BGCNS_RC_COMPLETE indicates that all participants have arrived at the barrier. BGCNS_RC_CONTINUE */
+ /* ! indicates that not all partipants arrived within the alloted timeout period. BGCNS_RC_ERROR */
+ /* ! indicates that other problem has been detected. */
+ /* ! @remarks This service is not thread safe. It is considered a programming error to invoke it */
+ /* ! from multiple threads concurrently and the behavior is not defined. */
+ int (*globalBarrier_nonBlocking)(unsigned channel, int reset, unsigned timeoutInMillis);
+
+ /* ! @brief Restart kernel in cycle reproducibility mode. */
+ /* ! @return Zero if no restart was required for reproducibility. */
+ /* ! @remarks This service must be called from each core and only after all I/O operations have been completed. */
+ /* ! Processors will be reset and kernels will start again. */
+ int (*setupReproducibility)(void);
+
+} BGCNS_ServiceDirectory;
+
+/* ! @deprecated */
+/* ! @typedef BGCNS_DeprecatedServicesDirectory */
+/* ! @struct _BGCNS_DeprecatedServices */
+/* ! @brief These services exist for historical reasons and are not further documented here. */
+/* ! They may not be available in future releases of CNS. */
+typedef struct _BGCNS_DeprecatedServices {
+ int (*torusTermCheck)(int* nonFatalRc);
+ int (*torusLinkErrCheck)(int* nonFatalRc);
+ int (*torusCRCExchange)(void);
+ int (*collectiveConfigureClassInternal)(unsigned virtualTree, unsigned short specifier);
+ int (*collectiveConfigureClass)(unsigned virtualTree, unsigned short specifier);
+ unsigned (*collectiveGetClass)(unsigned virtualTree);
+ int (*collectiveInit)(void);
+ int (*collectiveRelease)(void);
+ int (*collectiveHardReset)(void);
+ int (*netbusTermCheck)(void);
+ unsigned (*getSerDesLinkStatus)(void);
+ int (*dmaTermCheck)(void);
+} BGCNS_DeprecatedServicesDirectory;
+
+/* ! @typedef BGCNS_Descriptor */
+/* ! @struct _BGCNS_Descriptor */
+/* ! @brief The Common Node Services descriptor. This descriptor provides information to the kernel regarding */
+/* ! the CNS memory region as well as a service directory. The descriptor is passed to the kernel */
+/* ! upon boot and must not be altered by the kernel. */
+typedef struct _BGCNS_Descriptor {
+ BGCNS_ServiceDirectory* services; //!< A pointer to the services directory.
+ unsigned baseVirtualAddress; //!< The virtual address of the beginning of the CNS memory region.
+ unsigned size; //!< The size (in bytes) of the CNS memory region.
+ unsigned basePhysicalAddress; //!< The physical address of the CNS memory region.
+ unsigned basePhysicalAddressERPN; //!< The extended real page number of the CNS memory region.
+ unsigned bgcns_private_in_use; //!< Undefined. This field is for internal use only and may disappear at any time.
+ BGCNS_DeprecatedServicesDirectory* deprecatedServices; //!< @deprecated undocumented
+ unsigned version; //!< The CNS version
+} BGCNS_Descriptor;
+
+
+
+#endif /* !__ASSEMBLY */
+#endif /* _BGCNS_H */
diff --git a/arch/powerpc/include/asm/bgp_personality.h b/arch/powerpc/include/asm/bgp_personality.h
new file mode 100644
index 00000000000000..f4d9309640a4bf
--- /dev/null
+++ b/arch/powerpc/include/asm/bgp_personality.h
@@ -0,0 +1,1086 @@
+/*
+ * Andrew Tauferner
+ *
+ * Copyright 2006, 2007 International Business Machines
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef BGP_PERSONALITY_H_ // Prevent multiple inclusion
+#define BGP_PERSONALITY_H_
+
+
+
+
+/* #include <linux/types.h> */
+
+// These defines allows use of IBM's bit numberings (MSb=0, LSb=31)for multi-bit fields
+// b = IBM bit number of the least significant bit (highest number)
+// x = value to set in field
+// s = size
+#define _BS(b,x,s)( ( ( x) & ( 0x7FFFFFFF>> ( 31- ( s)))) << ( 31- ( b)))
+#define _BG(b,x,s)( ( _BS(b,0x7FFFFFFF,s) & x ) >> (31-b) )
+#define _BS64(b,x,s)( ( ( x) & ( 0x7FFFFFFFFFFFFFFFLL>> ( 63- ( s)))) << ( 63- ( b)))
+#define _BG64(b,x,s)( ( _BS64(b, 0x7FFFFFFFFFFFFFFFLL,s) & x ) >> (63-b) )
+#define _BN(b) ((1<<(31-(b))))
+#define _B1(b,x) (((x)&0x1)<<(31-(b)))
+#define _B2(b,x) (((x)&0x3)<<(31-(b)))
+#define _B3(b,x) (((x)&0x7)<<(31-(b)))
+#define _B4(b,x) (((x)&0xF)<<(31-(b)))
+#define _B5(b,x) (((x)&0x1F)<<(31-(b)))
+#define _B6(b,x) (((x)&0x3F)<<(31-(b)))
+#define _B7(b,x) (((x)&0x7F)<<(31-(b)))
+#define _B8(b,x) (((x)&0xFF)<<(31-(b)))
+#define _B9(b,x) (((x)&0x1FF)<<(31-(b)))
+#define _B10(b,x) (((x)&0x3FF)<<(31-(b)))
+#define _B11(b,x) (((x)&0x7FF)<<(31-(b)))
+#define _B12(b,x) (((x)&0xFFF)<<(31-(b)))
+#define _B13(b,x) (((x)&0x1FFF)<<(31-(b)))
+#define _B14(b,x) (((x)&0x3FFF)<<(31-(b)))
+#define _B15(b,x) (((x)&0x7FFF)<<(31-(b)))
+#define _B16(b,x) (((x)&0xFFFF)<<(31-(b)))
+#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b)))
+#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b)))
+#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b)))
+#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b)))
+#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b)))
+#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b)))
+#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b)))
+#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b)))
+#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b)))
+#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b)))
+#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b)))
+#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b)))
+#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b)))
+#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b)))
+#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b)))
+
+#define BGP_UCI_Component_Rack ( 0)
+#define BGP_UCI_Component_Midplane ( 1)
+#define BGP_UCI_Component_BulkPowerSupply ( 2)
+#define BGP_UCI_Component_PowerCable ( 3)
+#define BGP_UCI_Component_PowerModule ( 4)
+#define BGP_UCI_Component_ClockCard ( 5)
+#define BGP_UCI_Component_FanAssembly ( 6)
+#define BGP_UCI_Component_Fan ( 7)
+#define BGP_UCI_Component_ServiceCard ( 8)
+#define BGP_UCI_Component_LinkCard ( 9)
+#define BGP_UCI_Component_LinkChip (10)
+#define BGP_UCI_Component_LinkPort (11) // Identifies 1 end of a LinkCable
+#define BGP_UCI_Component_NodeCard (12)
+#define BGP_UCI_Component_ComputeCard (13)
+#define BGP_UCI_Component_IOCard (14)
+#define BGP_UCI_Component_DDRChip (15)
+#define BGP_UCI_Component_ENetConnector (16)
+
+typedef struct BGP_UCI_Rack_t
+ { // "Rxy": R<RackRow><RackColumn>
+ unsigned Component : 5; // when BGP_UCI_Component_Rack
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned _zero : 19; // zero's
+ }
+ BGP_UCI_Rack_t;
+
+#define BGP_UCI_RACK_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Rack
+#define BGP_UCI_RACK_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_RACK_RACKCOLUMN(x) _B4(12,x) // 0..F
+
+
+
+typedef struct BGP_UCI_Midplane_t
+ { // "Rxy-Mm": R<RackRow><RackColumn>-M<Midplane>
+ unsigned Component : 5; // when BGP_UCI_Component_Midplane
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned _zero : 18; // zero's
+ }
+ BGP_UCI_Midplane_t;
+
+#define BGP_UCI_MIDPLANE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Midplane
+#define BGP_UCI_MIDPLANE_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_MIDPLANE_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_MIDPLANE_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+
+
+typedef struct BGP_UCI_BulkPowerSupply_t
+ { // "Rxy-B": R<RackRow><RackColumn>-B
+ unsigned Component : 5; // when BGP_UCI_Component_BulkPowerSupply
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned _zero : 19; // zero's
+ }
+ BGP_UCI_BulkPowerSupply_t;
+
+#define BGP_UCI_BULKPOWERSUPPLY_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_BulkPowerSupply
+#define BGP_UCI_BULKPOWERSUPPLY_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_BULKPOWERSUPPLY_RACKCOLUMN(x) _B4(12,x) // 0..F
+
+
+
+typedef struct BGP_UCI_PowerCable_t
+ { // "Rxy-B-C": R<RackRow><RackColumn>-B-C
+ unsigned Component : 5; // when BGP_UCI_Component_PowerCable
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned _zero : 19; // zero's
+ }
+ BGP_UCI_PowerCable_t;
+
+#define BGP_UCI_POWERCABLE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerCable
+#define BGP_UCI_POWERCABLE_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_POWERCABLE_RACKCOLUMN(x) _B4(12,x) // 0..F
+
+
+
+typedef struct BGP_UCI_PowerModule_t
+ { // "Rxy-B-Pp": R<RackRow><RackColumn>-B-P<PowerModule>
+ unsigned Component : 5; // when BGP_UCI_Component_PowerModule
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned PowerModule : 3; // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear)
+ unsigned _zero : 16; // zero's
+ }
+ BGP_UCI_PowerModule_t;
+
+#define BGP_UCI_POWERMODULE_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerModule
+#define BGP_UCI_POWERMODULE_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_POWERMODULE_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_POWERMODULE_POWERMODULE(x) _B3(15,x) // 0..7 (0..3 left to right facing front, 4-7 left to right facing rear)
+
+
+typedef struct BGP_UCI_ClockCard_t
+ { // "Rxy-K": R<RackRow><RackColumn>-K
+ unsigned Component : 5; // when BGP_UCI_Component_ClockCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned _zero : 19; // zero's
+ }
+ BGP_UCI_ClockCard_t;
+
+#define BGP_UCI_CLOCKCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_PowerModule
+#define BGP_UCI_CLOCKCARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_CLOCKCARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+
+
+
+typedef struct BGP_UCI_FanAssembly_t
+ { // "Rxy-Mm-Aa": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>
+ unsigned Component : 5; // when BGP_UCI_Component_FanAssembly
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned FanAssembly : 4; // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+ unsigned _zero : 14; // zero's
+ }
+ BGP_UCI_FanAssembly_t;
+
+#define BGP_UCI_FANASSEMBLY_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_FanAssembly
+#define BGP_UCI_FANASSEMBLY_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_FANASSEMBLY_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_FANASSEMBLY_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_FANASSEMBLY_FANASSEMBLY(x) _B4(17,x) // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+
+
+
+typedef struct BGP_UCI_Fan_t
+ { // "Rxy-Mm-Aa-Ff": R<RackRow><RackColumn>-M<Midplane>-A<FanAssembly>-F<Fan>
+ unsigned Component : 5; // when BGP_UCI_Component_Fan
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned FanAssembly : 4; // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+ unsigned Fan : 2; // 0..2 (0=Tailstock, 2=Midplane)
+ unsigned _zero : 12; // zero's
+ }
+ BGP_UCI_Fan_t;
+
+#define BGP_UCI_FAN_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_Fan
+#define BGP_UCI_FAN_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_FAN_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_FAN_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_FAN_FANASSEMBLY(x) _B4(17,x) // 0..9 (0=Bot Front, 4=Top Front, 5=Bot Rear, 9=Top Rear)
+#define BGP_UCI_FAN_FAN(x) _B2(19,x) // 0..2 (0=Tailstock, 2=Midplane)
+
+typedef struct BGP_UCI_ServiceCard_t
+ { // "Rxy-Mm-S": R<RackRow><RackColumn>-M<Midplane>-S
+ unsigned Component : 5; // when BGP_UCI_Component_ServiceCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top (Master ServiceCard in M0)
+ unsigned _zero : 18; // zero's
+ }
+ BGP_UCI_ServiceCard_t;
+
+#define BGP_UCI_SERVICECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ServiceCard
+#define BGP_UCI_SERVICECARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_SERVICECARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_SERVICECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top (Master ServiceCard in M0)
+
+
+
+typedef struct BGP_UCI_LinkCard_t
+ { // "Rxy-Mm-Ll": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>
+ unsigned Component : 5; // when BGP_UCI_Component_LinkCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+ unsigned _zero : 16; // zero's
+ }
+ BGP_UCI_LinkCard_t;
+
+#define BGP_UCI_LINKCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkCard
+#define BGP_UCI_LINKCARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_LINKCARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_LINKCARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_LINKCARD_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+
+
+
+typedef struct BGP_UCI_LinkChip_t
+ { // "Rxy-Mm-Ll-Uu": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-U<LinkChip>
+ unsigned Component : 5; // when BGP_UCI_Component_LinkChip
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+ unsigned LinkChip : 3; // 00..05: left to right from Front
+ unsigned _zero : 13; // zero's
+ }
+ BGP_UCI_LinkChip_t;
+
+#define BGP_UCI_LINKCHIP_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkChip
+#define BGP_UCI_LINKCHIP_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_LINKCHIP_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_LINKCHIP_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_LINKCHIP_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+#define BGP_UCI_LINKCHIP_LINKCHIP(x) _B3(18,x) // 00..05: left to right from Front
+
+typedef struct BGP_UCI_LinkPort_t
+ { // "Rxy-Mm-Ll-Jjj": R<RackRow><RackColumn>-M<Midplane>-L<LinkCard>-J<LinkPort>
+ unsigned Component : 5; // when BGP_UCI_Component_LinkPort
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned LinkCard : 2; // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+ unsigned LinkPort : 4; // 00..15: left to right from Front
+ unsigned _zero : 12; // zero's
+ }
+ BGP_UCI_LinkPort_t;
+
+#define BGP_UCI_LINKPORT_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_LinkPort
+#define BGP_UCI_LINKPORT_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_LINKPORT_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_LINKPORT_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_LINKPORT_LINKCARD(x) _B2(15,x) // 0..3: 0=BF, 1=TF, 2=BR, 3=TR)
+#define BGP_UCI_LINKPORT_LINKPORT(x) _B4(19,x) // 00..15: left to right from Front
+
+
+typedef struct BGP_UCI_NodeCard_t
+ { // "Rxy-Mm-Nnn": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>
+ unsigned Component : 5; // when BGP_UCI_Component_NodeCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned _zero : 14; // zero's
+ }
+ BGP_UCI_NodeCard_t;
+
+#define BGP_UCI_NODECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_NodeCard
+#define BGP_UCI_NODECARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_NODECARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_NODECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_NODECARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+
+
+
+typedef struct BGP_UCI_ComputeCard_t
+ { // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>
+ unsigned Component : 5; // when BGP_UCI_Component_ComputeCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned ComputeCard : 6; // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+ unsigned _zero : 8; // zero's
+ }
+ BGP_UCI_ComputeCard_t;
+
+#define BGP_UCI_COMPUTECARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ComputeCard
+#define BGP_UCI_COMPUTECARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_COMPUTECARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_COMPUTECARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_COMPUTECARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_COMPUTECARD_COMPUTECARD(x) _B6(23,x) // 04..35 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+
+
+typedef struct BGP_UCI_IOCard_t
+ { // "Rxy-Mm-Nnn-Jxx": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>
+ unsigned Component : 5; // when BGP_UCI_Component_IOCard
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned ComputeCard : 6; // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+ unsigned _zero : 8; // zero's
+ }
+ BGP_UCI_IOCard_t;
+
+#define BGP_UCI_IOCARD_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_IOCard
+#define BGP_UCI_IOCARD_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_IOCARD_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_IOCARD_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_IOCARD_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_IOCARD_COMPUTECARD(x) _B6(23,x) // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+
+
+
+typedef struct BGP_UCI_DDRChip_t
+ { // "Rxy-Mm-Nnn-Jxx-Uuu": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-J<ComputeCard>-U<DDRChip>
+ unsigned Component : 5; // when BGP_UCI_Component_DDRChip
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned ComputeCard : 6; // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+ unsigned DDRChip : 5; // 00..20
+ unsigned _zero : 3; // zero's
+ }
+ BGP_UCI_DDRChip_t;
+
+#define BGP_UCI_DDRCHIP_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_DDRChip
+#define BGP_UCI_DDRCHIP_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_DDRCHIP_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_DDRCHIP_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_DDRCHIP_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_DDRCHIP_COMPUTECARD(x) _B6(23,x) // 00..01 (00-01 IOCard, 02-03 Reserved, 04-35 ComputeCard)
+#define BGP_UCI_DDRCHIP_DDRCHIP(x) _B5(28,x) // 00..20
+
+
+typedef struct BGP_UCI_ENetConnector_t
+ { // "Rxy-Mm-Nnn-ENe": R<RackRow><RackColumn>-M<Midplane>-N<NodeCard>-EN<EN>
+ unsigned Component : 5; // when BGP_UCI_Component_ENetConnector
+ unsigned RackRow : 4; // 0..F
+ unsigned RackColumn : 4; // 0..F
+ unsigned Midplane : 1; // 0=Bottom, 1=Top
+ unsigned NodeCard : 4; // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+ unsigned EN : 1; // 0..1 (Equal to IOCard number)
+ unsigned _zero : 13; // zero's
+ }
+ BGP_UCI_ENetConnector_t;
+
+#define BGP_UCI_ENETCONNECTOR_COMPONENT(x) _B5( 4,x) // when BGP_UCI_Component_ENetConnector
+#define BGP_UCI_ENETCONNECTOR_RACKROW(x) _B4( 8,x) // 0..F
+#define BGP_UCI_ENETCONNECTOR_RACKCOLUMN(x) _B4(12,x) // 0..F
+#define BGP_UCI_ENETCONNECTOR_MIDPLANE(x) _B1(13,x) // 0=Bottom, 1=Top
+#define BGP_UCI_ENETCONNECTOR_NODECARD(x) _B4(17,x) // 00..15: 00=BF, 07=TF, 08=BR, 15=TR)
+#define BGP_UCI_ENETCONNECTOR_ENETCONNECTOR(x) _B1(18,x) // 0..1 (Equal to IOCard number)
+
+
+
+typedef union TBGP_UniversalComponentIdentifier
+ {
+ uint32_t UCI;
+ BGP_UCI_Rack_t Rack;
+ BGP_UCI_Midplane_t Midplane;
+ BGP_UCI_BulkPowerSupply_t BulkPowerSupply;
+ BGP_UCI_PowerCable_t PowerCable;
+ BGP_UCI_PowerModule_t PowerModule;
+ BGP_UCI_ClockCard_t ClockCard;
+ BGP_UCI_FanAssembly_t FanAssembly;
+ BGP_UCI_Fan_t Fan;
+ BGP_UCI_ServiceCard_t ServiceCard;
+ BGP_UCI_LinkCard_t LinkCard;
+ BGP_UCI_LinkChip_t LinkChip;
+ BGP_UCI_LinkPort_t LinkPort;
+ BGP_UCI_NodeCard_t NodeCard;
+ BGP_UCI_ComputeCard_t ComputeCard;
+ BGP_UCI_IOCard_t IOCard;
+ BGP_UCI_DDRChip_t DDRChip;
+ BGP_UCI_ENetConnector_t ENetConnector;
+ }
+ BGP_UniversalComponentIdentifier;
+
+
+
+#define BGP_PERSONALITY_VERSION (0x0A)
+
+#define BGP_DEFAULT_FREQ (850)
+
+#define BGP_PERS_PROCESSCONFIG_DIAGS (0xFF000000) // Diagnostic Mode: All Cores Enabled and Privileged in Process 0
+#define BGP_PERS_PROCESSCONFIG_SMP (0x0F000000) // All Cores Enabled User-Space in Process 0
+#define BGP_PERS_PROCESSCONFIG_VNM (0x08040201) // 4 Single-Core Processes (a.k.a. Virtual Nodes)
+#define BGP_PERS_PROCESSCONFIG_2x2 (0x0C030000) // 2 Processes of 2 Cores each in same DP unit
+#define BGP_PERS_PROCESSCONFIG_2x2_CROSS1 (0x09060000) // 2 Processes of 2 Cores in different DP units
+#define BGP_PERS_PROCESSCONFIG_2x2_CROSS2 (0x0A050000) // 2 Processes of 2 Cores in different DP units
+#define BGP_PERS_PROCESSCONFIG_3PLUS1 (0x0E010000) // 3 Cores in one Processes, 4th Core in Separate Process
+#define BGP_PERS_PROCESSCONFIG_DEFAULT (BGP_PERS_PROCESSCONFIG_DIAGS)
+
+
+// Personality.Kernel_Config.RASPolicy
+#define BGP_PERS_RASPOLICY_VERBOSITY(x) _B2( 1,x) // Verbosity as shown below
+#define BGP_PERS_RASPOLICY_MINIMAL BGP_PERS_RASPOLICY_VERBOSITY(0) // Benchmarking Level of Capture and Reporting
+#define BGP_PERS_RASPOLICY_NORMAL BGP_PERS_RASPOLICY_VERBOSITY(1) // Normal Production Level of Capture and Reporting
+#define BGP_PERS_RASPOLICY_VERBOSE BGP_PERS_RASPOLICY_VERBOSITY(2) // Manufacturing Test and Diagnostics
+#define BGP_PERS_RASPOLICY_EXTREME BGP_PERS_RASPOLICY_VERBOSITY(3) // Report Every Event Immediately - Thresholds set to 1
+#define BGP_PERS_RASPOLICY_FATALEXIT _BN( 2) // Fatal is Fatal, so exit.
+
+#define BGP_PERS_RASPOLICY_DEFAULT (BGP_PERS_RASPOLICY_VERBOSE | BGP_PERS_RASPOLICY_FATALEXIT)
+
+
+#define BGP_PERSONALITY_LEN_NFSDIR (32) // 32bytes
+
+#define BGP_PERSONALITY_LEN_SECKEY (32) // 32bytes
+
+// Personality.NodeConfig Driver Enables and Configurations
+#define BGP_PERS_ENABLE_Simulation _BN( 0) // Running on VHDL Simulation
+#define BGP_PERS_ENABLE_LockBox _BN( 1)
+#define BGP_PERS_ENABLE_BIC _BN( 2)
+#define BGP_PERS_ENABLE_DDR _BN( 3) // DDR Controllers (not Fusion DDR model)
+#define BGP_PERS_ENABLE_LoopBack _BN( 4) // LoopBack: Internal TS/TR or SerDes Loopback
+#define BGP_PERS_ENABLE_GlobalInts _BN( 5)
+#define BGP_PERS_ENABLE_Collective _BN( 6) // Enable Collective Network
+#define BGP_PERS_ENABLE_Torus _BN( 7)
+#define BGP_PERS_ENABLE_TorusMeshX _BN( 8) // Torus is a Mesh in the X-dimension
+#define BGP_PERS_ENABLE_TorusMeshY _BN( 9) // Torus is a Mesh in the Y-dimension
+#define BGP_PERS_ENABLE_TorusMeshZ _BN(10) // Torus is a Mesh in the Z-dimension
+#define BGP_PERS_ENABLE_TreeA _BN(11) // Enable Collective Network A-link
+#define BGP_PERS_ENABLE_TreeB _BN(12) // Enable Collective Network B-link
+#define BGP_PERS_ENABLE_TreeC _BN(13) // Enable Collective Network C-link
+#define BGP_PERS_ENABLE_DMA _BN(14)
+#define BGP_PERS_ENABLE_SerDes _BN(15)
+#define BGP_PERS_ENABLE_UPC _BN(16)
+#define BGP_PERS_ENABLE_EnvMon _BN(17)
+#define BGP_PERS_ENABLE_Ethernet _BN(18)
+#define BGP_PERS_ENABLE_JTagLoader _BN(19) // Converse with JTag Host to load kernel
+#define BGP_PERS_ENABLE_MailBoxReceive BGP_PERS_ENABLE_JTagLoader
+#define BGP_PERS_ENABLE_PowerSave _BN(20) // Turn off unused devices (Eth on CN, TS on ION)
+#define BGP_PERS_ENABLE_FPU _BN(21) // Enable Double-Hummers (not supported in EventSim)
+#define BGP_PERS_ENABLE_StandAlone _BN(22) // Disable "CIOD" interface, Requires Collective!
+#define BGP_PERS_ENABLE_TLBMisses _BN(23) // TLB Misses vs Wasting Memory (see bgp_AppSetup.c)
+#define BGP_PERS_ENABLE_Mambo _BN(24) // Running under Mambo? Used by Linux
+#define BGP_PERS_ENABLE_TreeBlast _BN(25) // Enable Tree "Blast" mode
+#define BGP_PERS_ENABLE_BlindStacks _BN(26) // For "XB" Tests, Lock 16K Stacks in Blind Device
+#define BGP_PERS_ENABLE_CNK_Malloc _BN(27) // Enable Malloc Support in CNK.
+#define BGP_PERS_ENABLE_Reproducibility _BN(28) // Enable Cycle Reproducibility
+#define BGP_PERS_ENABLE_HighThroughput _BN(29) // Enable high throughput computing mode
+#define BGP_PERS_ENABLE_DiagnosticsMode _BN(30) // Enable diagnostics mode
+
+// Configure L1+L2 into BG/L Mode (s/w managed L1 coherence, write-back)
+// This overrides most L1, L2, and Snoop settings. Carefull!!
+#define BGP_PERS_ENABLE_BGLMODE _BN(31) // (not yet fully implemented)
+
+// Default Setup for Simulation: Torus Meshes, DMA, SerDes, Ethernet, JTagLoader, PowerSave
+#define BGP_PERS_NODECONFIG_DEFAULT (BGP_PERS_ENABLE_Simulation |\
+ BGP_PERS_ENABLE_LockBox |\
+ BGP_PERS_ENABLE_BIC |\
+ BGP_PERS_ENABLE_DDR |\
+ BGP_PERS_ENABLE_LoopBack |\
+ BGP_PERS_ENABLE_GlobalInts |\
+ BGP_PERS_ENABLE_Collective |\
+ BGP_PERS_ENABLE_Torus |\
+ BGP_PERS_ENABLE_UPC |\
+ BGP_PERS_ENABLE_EnvMon |\
+ BGP_PERS_ENABLE_FPU |\
+ BGP_PERS_ENABLE_StandAlone)
+
+// Default Setup for Hardware:
+// Supports Stand-Alone CNA Applications.
+// Bootloader-Extensions and XB's must turn-off JTagLoader
+#define BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE (BGP_PERS_ENABLE_JTagLoader |\
+ BGP_PERS_ENABLE_LockBox |\
+ BGP_PERS_ENABLE_BIC |\
+ BGP_PERS_ENABLE_DDR |\
+ BGP_PERS_ENABLE_GlobalInts |\
+ BGP_PERS_ENABLE_Collective |\
+ BGP_PERS_ENABLE_SerDes |\
+ BGP_PERS_ENABLE_UPC |\
+ BGP_PERS_ENABLE_EnvMon |\
+ BGP_PERS_ENABLE_FPU |\
+ BGP_PERS_ENABLE_StandAlone)
+
+// these fields are defined by the control system depending on compute/io node
+// BGP_PERS_ENABLE_Torus |
+// BGP_PERS_ENABLE_TorusMeshX |
+// BGP_PERS_ENABLE_TorusMeshY |
+// BGP_PERS_ENABLE_TorusMeshZ |
+
+
+
+// Personality.L1Config: Controls and Settings for L1 Cache
+#define BGP_PERS_L1CONFIG_L1I _BN( 0) // L1 Enabled for Instructions
+#define BGP_PERS_L1CONFIG_L1D _BN( 1) // L1 Enabled for Data
+#define BGP_PERS_L1CONFIG_L1SWOA _BN( 2) // L1 Store WithOut Allocate
+#define BGP_PERS_L1CONFIG_L1Recovery _BN( 3) // L1 Full Recovery Mode
+#define BGP_PERS_L1CONFIG_L1WriteThru _BN( 4) // L1 Write-Thru (not svc_host changeable (yet?))
+#define BGP_PERS_L1CONFIG_DO_L1ITrans _BN( 5) // Enable L1 Instructions Transient?
+#define BGP_PERS_L1CONFIG_DO_L1DTrans _BN( 6) // Enable L1 Data Transient?
+ // unused 9bits: 7..15
+#define BGP_PERS_L1CONFIG_L1ITrans(x) _B8(23,x) // L1 Transient for Instructions in Groups of 16 Lines
+#define BGP_PERS_L1CONFIG_L1DTrans(x) _B8(31,x) // L1 Transient for Data in Groups of 16 Lines
+
+#define BGP_PERS_L1CONFIG_DEFAULT (BGP_PERS_L1CONFIG_L1I |\
+ BGP_PERS_L1CONFIG_L1D |\
+ BGP_PERS_L1CONFIG_L1SWOA |\
+ BGP_PERS_L1CONFIG_L1Recovery |\
+ BGP_PERS_L1CONFIG_L1WriteThru)
+
+typedef union TBGP_Pers_L1Cfg
+ {
+ uint32_t l1cfg;
+ struct {
+ unsigned l1i : 1;
+ unsigned l1d : 1;
+ unsigned l1swoa : 1;
+ unsigned l1recovery : 1;
+ unsigned l1writethru : 1;
+ unsigned do_l1itrans : 1;
+ unsigned do_l1dtrans : 1;
+ unsigned l1rsvd : 9;
+ unsigned l1itrans : 8;
+ unsigned l1dtrans : 8;
+ };
+ }
+ BGP_Pers_L1Cfg;
+
+// Personality.L2Config: Controls and Settings for L2 and Snoop
+#define BGP_PERS_L2CONFIG_L2I _BN( 0) // L2 Instruction Caching Enabled
+#define BGP_PERS_L2CONFIG_L2D _BN( 1) // L2 Data Caching Enabled
+#define BGP_PERS_L2CONFIG_L2PF _BN( 2) // L2 Automatic Prefetching Enabled
+#define BGP_PERS_L2CONFIG_L2PFO _BN( 3) // L2 Optimistic Prefetching Enabled
+#define BGP_PERS_L2CONFIG_L2PFA _BN( 4) // L2 Aggressive Prefetching Enabled (fewer deeper streams)
+#define BGP_PERS_L2CONFIG_L2PFS _BN( 5) // L2 Aggressive Many-Stream Prefetching Enabled (deeper only when available buffers)
+#define BGP_PERS_L2CONFIG_Snoop _BN( 6) // Just NULL Snoop Filter
+#define BGP_PERS_L2CONFIG_SnoopCache _BN( 7) // Snoop Caches
+#define BGP_PERS_L2CONFIG_SnoopStream _BN( 8) // Snoop Stream Registers (Disable for BG/P Rit 1.0 due to PPC450 errata)
+#define BGP_PERS_L2CONFIG_SnoopRange _BN( 9) // Snoop Range Filter when possible
+#define BGP_PERS_L2CONFIG_BUG824LUMPY _BN(10) // BPC_BUGS 824: Fix with Lumpy Performance
+#define BGP_PERS_L2CONFIG_BUG824SMOOTH _BN(11) // BPC_BUGS 824: Fix with Smooth Performance, but -12% Memory
+#define BGP_PERS_L2CONFIG_NONCOHERENT_STACKS _BN(12) // Special for Snoop diagnostics. See bgp_vmm.c
+ // additional bits may be used for Snoop setting tweaks
+
+// Default L2 Configuration:
+// L2 Enabled with Multi-Stream Aggressive Prefetching
+// Snoop Enabled with all filters except Range
+#define BGP_PERS_L2CONFIG_DEFAULT (BGP_PERS_L2CONFIG_L2I |\
+ BGP_PERS_L2CONFIG_L2D |\
+ BGP_PERS_L2CONFIG_L2PF |\
+ BGP_PERS_L2CONFIG_L2PFO |\
+ BGP_PERS_L2CONFIG_L2PFS |\
+ BGP_PERS_L2CONFIG_Snoop |\
+ BGP_PERS_L2CONFIG_SnoopCache |\
+ BGP_PERS_L2CONFIG_SnoopStream|\
+ BGP_PERS_L2CONFIG_BUG824LUMPY)
+
+
+// Personality.L3Config: Controls and Settings for L3
+// Note: Most bits match BGP_L3x_CTRL DCRs.
+// See arch/include/bpcore/bgl_l3_dcr.h
+#define BGP_PERS_L3CONFIG_L3I _BN( 0) // L3 Enabled for Instructions
+#define BGP_PERS_L3CONFIG_L3D _BN( 1) // L3 Enabled for Data
+#define BGP_PERS_L3CONFIG_L3PFI _BN( 2) // Inhibit L3 Prefetch from DDR
+#define BGP_PERS_L3CONFIG_DO_Scratch _BN( 3) // Set up Scratch?
+#define BGP_PERS_L3CONFIG_DO_PFD0 _BN( 4) // Adjust PFD0?
+#define BGP_PERS_L3CONFIG_DO_PFD1 _BN( 5) // Adjust PFD1?
+#define BGP_PERS_L3CONFIG_DO_PFDMA _BN( 6) // Adjust PFDMA?
+#define BGP_PERS_L3CONFIG_DO_PFQD _BN( 7) // Adjust PFQD?
+ // 8..15 unused/available
+#define BGP_PERS_L3CONFIG_Scratch(x) _B4(19,x) // Scratch 8ths: 0..8
+#define BGP_PERS_L3CONFIG_PFD0(x) _B3(22,x) // Prefetch Depth for DP0
+#define BGP_PERS_L3CONFIG_PFD1(x) _B3(25,x) // Prefetch Depth for DP1
+#define BGP_PERS_L3CONFIG_PFDMA(x) _B3(28,x) // Prefetch Depth for DMA
+#define BGP_PERS_L3CONFIG_PFQD(x) _B3(31,x) // Prefetch Queue Depth
+
+// General L3 Configuration
+typedef union TBGP_Pers_L3Cfg
+ {
+ uint32_t l3cfg;
+ struct {
+ unsigned l3i : 1;
+ unsigned l3d : 1;
+ unsigned l3pfi : 1;
+ unsigned do_scratch : 1;
+ unsigned do_pfd0 : 1;
+ unsigned do_pfd1 : 1;
+ unsigned do_pfdma : 1;
+ unsigned do_pfqd : 1;
+ unsigned rsvd : 8;
+ unsigned scratch : 4;
+ unsigned pfd0 : 3;
+ unsigned pfd1 : 3;
+ unsigned pfdma : 3;
+ unsigned pfqd : 3;
+ };
+ }
+ BGP_Pers_L3Cfg;
+
+// Default L3 Configuration:
+// L3 Enabled for Instructions and Data
+// No Prefetch Depth overrides, No Scratch, No Scrambling.
+#define BGP_PERS_L3CONFIG_DEFAULT (BGP_PERS_L3CONFIG_L3I |\
+ BGP_PERS_L3CONFIG_L3D |\
+ BGP_PERS_L3CONFIG_DO_PFDMA |\
+ BGP_PERS_L3CONFIG_PFDMA(4))
+
+
+// L3 Cache and Bank Selection, and prefetching tweaks (Recommended for Power-Users)
+#define BGP_PERS_L3SELECT_DO_CacheSel _BN( 0) // Adjust Cache Select setting?
+#define BGP_PERS_L3SELECT_DO_BankSel _BN( 1) // Adjust Bank Select setting?
+#define BGP_PERS_L3SELECT_Scramble _BN( 2) // L3 Scramble
+#define BGP_PERS_L3SELECT_PFby2 _BN( 3) // Prefetch by 2 if set, else by 1 (default) if clear.
+#define BGP_PERS_L3SELECT_CacheSel(x) _B5( 8,x) // PhysAddr Bit for L3 Selection (0..26)
+#define BGP_PERS_L3SELECT_BankSel(x) _B5(13,x) // PhysAddr Bit for L3 Bank Selection (0..26) Must be > CacheSel.
+
+typedef union TBGP_Pers_L3Select
+ {
+ uint32_t l3select;
+ struct {
+ unsigned do_CacheSel : 1;
+ unsigned do_BankSel : 1;
+ unsigned l3Scramble : 1;
+ unsigned l3_PF_by2 : 1; // default is PreFetch by 1.
+ unsigned CacheSel : 5; // Physical Address Bit for L3 Selection (0..26)
+ unsigned BankSel : 5; // 0..26 Must be strictly greater than CacheSel.
+ unsigned rsvd : 18;
+ };
+ }
+ BGP_Pers_L3Select;
+
+// Default L3 Selection Configuration: Disable overrides, but set h/w default values.
+#define BGP_PERS_L3SELECT_DEFAULT (BGP_PERS_L3SELECT_CacheSel(21) |\
+ BGP_PERS_L3SELECT_BankSel(26))
+
+// Tracing Masks and default trace configuration
+#define BGP_TRACE_CONFIG _BN( 0) // Display Encoded personality config on startup
+#define BGP_TRACE_ENTRY _BN( 1) // Function enter and exit
+#define BGP_TRACE_INTS _BN( 2) // Standard Interrupt Dispatch
+#define BGP_TRACE_CINTS _BN( 3) // Critical Interrupt Dispatch
+#define BGP_TRACE_MCHK _BN( 4) // Machine Check Dispatch
+#define BGP_TRACE_SYSCALL _BN( 5) // System Calls
+#define BGP_TRACE_VMM _BN( 6) // Virtual Memory Manager
+#define BGP_TRACE_DEBUG _BN( 7) // Debug Events (app crashes etc)
+#define BGP_TRACE_TORUS _BN( 8) // Torus Init
+#define BGP_TRACE_TREE _BN( 9) // Tree Init
+#define BGP_TRACE_GLOBINT _BN(10) // Global Interrupts
+#define BGP_TRACE_DMA _BN(11) // DMA Setup
+#define BGP_TRACE_SERDES _BN(12) // SerDes Init
+#define BGP_TRACE_TESTINT _BN(13) // Test Interface, ECID, Config
+#define BGP_TRACE_ETHTX _BN(14) // Ethernet Transmit
+#define BGP_TRACE_ETHRX _BN(15) // Ethernet Receive
+#define BGP_TRACE_POWER _BN(16) // Power Control
+#define BGP_TRACE_PROCESS _BN(17) // Process/Thread Mapping
+#define BGP_TRACE_EXIT_SUM _BN(18) // Report Per-Core Interrupt and Error Summary on exit()
+#define BGP_TRACE_SCHED _BN(19) // Report Scheduler Information
+#define BGP_TRACE_RAS _BN(20) // Report RAS Events (in addition to sending to Host)
+#define BGP_TRACE_ECID _BN(21) // Report UCI and ECID on boot
+#define BGP_TRACE_FUTEX _BN(22) // Trace Futex operations
+#define BGP_TRACE_MemAlloc _BN(23) // Trace MMAP and Shared Memory operations
+#define BGP_TRACE_WARNINGS _BN(30) // Trace Warnings
+#define BGP_TRACE_VERBOSE _BN(31) // Verbose Tracing Modifier
+
+// Enable tracking of Regression Suite coverage and report UCI+ECID on boot
+#define BGP_PERS_TRACE_DEFAULT (BGP_TRACE_CONFIG | BGP_TRACE_ECID)
+
+
+typedef struct BGP_Personality_Kernel_t
+ {
+ uint32_t UniversalComponentIdentifier; // see include/common/bgp_ras.h
+
+ uint32_t FreqMHz; // Clock_X1 Frequency in MegaHertz (eg 1000)
+
+ uint32_t RASPolicy; // Verbosity level, and other RAS Reporting Controls
+
+ // Process Config:
+ // Each byte represents a process (1 to 4 processes supported)
+ // No core can be assigned to more than 1 process.
+ // Cores assigned to no process are disabled.
+ // Cores with in a process share the same address space.
+ // Separate processes have distinct address spaces.
+ // Within each process (0 to 4 cores assigned to a process):
+ // Lower nibble is bitmask of which core belongs to that process.
+ // Upper nibble is bitmask whether that thread is privileged or user.
+ // Processes with zero cores do not exist.
+ // E.g., for Diagnostics, we use 0xFF000000, which means
+ // that all 4 cores run privileged in process 0.
+ uint32_t ProcessConfig;
+
+ uint32_t TraceConfig; // Kernel Tracing Enables
+ uint32_t NodeConfig; // Kernel Driver Enables
+ uint32_t L1Config; // L1 Config and setup controls
+ uint32_t L2Config; // L2 and Snoop Config and setup controls
+ uint32_t L3Config; // L3 Config and setup controls
+ uint32_t L3Select; // L3 Cache and Bank Selection controls
+
+ uint32_t SharedMemMB; // Memory to Reserve for Sharing among Processes
+
+ uint32_t ClockStop0; // Upper 11Bits of ClockStop, enabled if Non-zero
+ uint32_t ClockStop1; // Lower 32Bits of ClockStop, enabled if Non-zero
+ }
+ BGP_Personality_Kernel_t;
+
+
+// Defaults for DDR Config
+#define BGP_PERS_DDR_PBX0_DEFAULT (0x411D1512) // PBX DCRs setting (in IBM bit numbering)
+#define BGP_PERS_DDR_PBX1_DEFAULT (0x40000000) // PBX DCRs setting (in IBM bit numbering)
+#define BGP_PERS_DDR_MemConfig0_DEFAULT (0x81fc4080) // MemConfig
+#define BGP_PERS_DDR_MemConfig1_DEFAULT (0x0C0ff800) // MemConfig
+#define BGP_PERS_DDR_ParmCtl0_DEFAULT (0x3216c008) // Parm Control
+#define BGP_PERS_DDR_ParmCtl1_DEFAULT (0x4168c323) // Parm Control
+#define BGP_PERS_DDR_MiscCtl0_DEFAULT (0) // Misc. Control
+#define BGP_PERS_DDR_MiscCtl1_DEFAULT (0) // Misc. Control
+#define BGP_PERS_DDR_CmdBufMode0_DEFAULT (0x00400fdf) // Command Buffer Mode
+#define BGP_PERS_DDR_CmdBufMode1_DEFAULT (0xffc80600) // Command Buffer Mode
+#define BGP_PERS_DDR_RefrInterval0_DEFAULT (0xD1000002) // Refresh Interval
+#define BGP_PERS_DDR_RefrInterval1_DEFAULT (0x04000000) // Refresh Interval
+#define BGP_PERS_DDR_ODTCtl0_DEFAULT (0) // ODT Control
+#define BGP_PERS_DDR_ODTCtl1_DEFAULT (0) // ODT Control
+#define BGP_PERS_DDR_DataStrobeCalib0_DEFAULT (0x08028a64) // Data Strobe Calibration
+#define BGP_PERS_DDR_DataStrobeCalib1_DEFAULT (0xa514c805) // Data Strobe Calibration
+#define BGP_PERS_DDR_DQSCtl_DEFAULT (0x00000168) // DQS Control
+#define BGP_PERS_DDR_Throttle_DEFAULT (0) // DDR Throttle
+//1#define BGP_PERS_DDR_DDRSizeMB_DEFAULT (4096) // Total DDR size in MegaBytes (512MB - 16384MB).
+#define BGP_PERS_DDR_DDRSizeMB_DEFAULT (1024) // Total DDR size in MegaBytes (512MB - 16384MB).
+//1#define BGP_PERS_DDR_Chips_DEFAULT (0x0B) // Type of DDR chips
+#define BGP_PERS_DDR_Chips_DEFAULT (0x09) // Type of DDR chips
+#define BGP_PERS_DDR_CAS_DEFAULT (4) // CAS Latency (3, 4, or 5)
+
+
+#define BGP_PERS_DDRFLAGS_ENABLE_Scrub _BN(0) // Enable DDR Slow Scrub when 1
+
+// DDRFLAGS default: Enable Slow Scrub.
+#define BGP_PERS_DDRFLAGS_DEFAULT (BGP_PERS_DDRFLAGS_ENABLE_Scrub)
+
+#define BGP_PERS_SRBS0_DEFAULT (0)
+#define BGP_PERS_SRBS1_DEFAULT (0)
+
+typedef struct BGP_Personality_DDR_t
+ {
+ uint32_t DDRFlags; // Misc. Flags and Settings
+ uint32_t SRBS0; // Controller 0 SRBS/CK Settings
+ uint32_t SRBS1; // Controller 1 SRBS/CK Settings
+ uint32_t PBX0; // PBX DCRs setting (in IBM bit numbering)
+ uint32_t PBX1; // PBX DCRs setting (in IBM bit numbering)
+ uint32_t MemConfig0; // MemConfig
+ uint32_t MemConfig1; // MemConfig
+ uint32_t ParmCtl0; // Parm Control
+ uint32_t ParmCtl1; // Parm Control
+ uint32_t MiscCtl0; // Misc. Control
+ uint32_t MiscCtl1; // Misc. Control
+ uint32_t CmdBufMode0; // Command Buffer Mode
+ uint32_t CmdBufMode1; // Command Buffer Mode
+ uint32_t RefrInterval0; // Refresh Interval
+ uint32_t RefrInterval1; // Refresh Interval
+ uint32_t ODTCtl0; // ODT Control
+ uint32_t ODTCtl1; // ODT Control
+ uint32_t DataStrobeCalib0; // Data Strobe Calibration
+ uint32_t DataStrobeCalib1; // Data Strobe Calibration
+ uint32_t DQSCtl; // DQS Control
+ uint32_t Throttle; // DDR Throttle
+ uint16_t DDRSizeMB; // Total DDR size in MegaBytes (512MB - 16384MB).
+ uint8_t Chips; // Type of DDR chips
+ uint8_t CAS; // CAS Latency (3, 4, or 5)
+ }
+ BGP_Personality_DDR_t;
+
+
+typedef struct BGP_Personality_Networks_t
+ {
+ uint32_t BlockID; // a.k.a. PartitionID
+
+ uint8_t Xnodes,
+ Ynodes,
+ Znodes,
+ Xcoord,
+ Ycoord,
+ Zcoord;
+
+ // PSet Support
+ uint16_t PSetNum;
+ uint32_t PSetSize;
+ uint32_t RankInPSet;
+
+ uint32_t IOnodes;
+ uint32_t Rank; // Rank in Block (or Partition)
+ uint32_t IOnodeRank; // Rank (and therefore P2P Addr) of my I/O Node
+ uint16_t TreeRoutes[ 16 ];
+ }
+ BGP_Personality_Networks_t;
+
+
+typedef struct BGP_IP_Addr_t
+ {
+ // IPv6 Addresses are 16 bytes, where the
+ // lower 4 (indices 12-15) can be used for IPv4 address.
+ uint8_t octet[ 16 ];
+ }
+ BGP_IP_Addr_t;
+
+
+typedef struct BGP_Personality_Ethernet_t
+ {
+ uint16_t MTU; // Initial emac MTU size
+ uint8_t EmacID[6]; // MAC address for emac
+ BGP_IP_Addr_t IPAddress; // IPv6/IPv4 address of this node
+ BGP_IP_Addr_t IPNetmask; // IPv6/IPv4 netmask
+ BGP_IP_Addr_t IPBroadcast; // IPv6/IPv4 broadcast address
+ BGP_IP_Addr_t IPGateway; // IPv6/IPv4 initial gateway (zero if none)
+ BGP_IP_Addr_t NFSServer; // IPv6/IPv4 NFS system software server address
+ BGP_IP_Addr_t serviceNode; // IPv6/IPv4 address of service node
+
+ // NFS mount info
+ char NFSExportDir[BGP_PERSONALITY_LEN_NFSDIR];
+ char NFSMountDir[BGP_PERSONALITY_LEN_NFSDIR];
+
+ // Security Key for Service Node authentication
+ uint8_t SecurityKey[BGP_PERSONALITY_LEN_SECKEY ];
+ }
+ BGP_Personality_Ethernet_t;
+
+
+
+#define BGP_PERS_BLKCFG_IPOverCollective _BN(31)
+#define BGP_PERS_BLKCFG_IPOverTorus _BN(30)
+#define BGP_PERS_BLKCFG_IPOverCollectiveVC _BN(29)
+#define BGP_PERS_BLKCFG_CIOModeSel(x) _B2(28,x)
+#define BGP_PERS_BLKCFG_bgsysFSSel(x) _B3(26,x)
+#define BGP_PERS_BLKCFG_CIOMode_Full 0
+#define BGP_PERS_BLKCFG_CIOMode_MuxOnly 1
+#define BGP_PERS_BLKCFG_CIOMode_None 2
+#define BGP_PERS_BLKCFG_bgsys_NFSv3 0
+#define BGP_PERS_BLKCFG_bgsys_NFSv4 1
+#define BGP_PERS_BLKCFG_DEFAULT (BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full) | \
+ BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3))
+
+typedef struct TBGP_Personality_t
+ {
+ uint16_t CRC;
+ uint8_t Version;
+ uint8_t PersonalitySizeWords;
+
+ BGP_Personality_Kernel_t Kernel_Config;
+
+ BGP_Personality_DDR_t DDR_Config;
+
+ BGP_Personality_Networks_t Network_Config;
+
+ BGP_Personality_Ethernet_t Ethernet_Config;
+
+ uint8_t Block_Config;
+ uint8_t padd[7]; // Pad size to multiple of 16 bytes (== width of DEVBUS_DATA tdr)
+ // to simplify jtag operations. See issue #140.
+ }
+ BGP_Personality_t;
+
+
+// Define a static initializer for default configuration. (DEFAULTS FOR SIMULATION)
+// This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c
+#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER { \
+ 0, /* CRC */ \
+ BGP_PERSONALITY_VERSION, /* Version */ \
+ (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \
+ { /* BGP_Personality_Kernel_t: */ \
+ 0, /* MachineLocation */ \
+ BGP_DEFAULT_FREQ, /* FreqMHz */ \
+ BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \
+ BGP_PERS_PROCESSCONFIG_DEFAULT, /* ProcessConfig */ \
+ BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \
+ BGP_PERS_NODECONFIG_DEFAULT, /* NodeConfig */ \
+ BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \
+ BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \
+ BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \
+ BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \
+ 0, /* SharedMemMB */ \
+ 0, /* ClockStop0 */ \
+ 0 /* ClockStop1 */ \
+ }, \
+ { /* BGP_Personality_DDR_t: */ \
+ BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \
+ BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \
+ BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \
+ BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \
+ BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \
+ BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \
+ BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \
+ BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \
+ BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \
+ BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \
+ BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \
+ BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \
+ BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \
+ BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \
+ BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \
+ BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \
+ BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \
+ BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \
+ BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+ BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \
+ BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \
+ BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \
+ BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \
+ BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \
+ }, \
+ { /* BGP_Personality_Networks_t: */ \
+ 0, /* BlockID */ \
+ 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \
+ 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \
+ 0, /* PSetNum */ \
+ 0, /* PSetSize */ \
+ 0, /* RankInPSet */ \
+ 0, /* IOnodes */ \
+ 0, /* Rank */ \
+ 0, /* IOnodeRank */ \
+ { 0, } /* TreeRoutes[ 16 ] */ \
+ }, \
+ { /* BGP_Personality_Ethernet_t: */ \
+ 1536, /* mtu */ \
+ { 0, }, /* EmacID[6] */ \
+ { { 0x00,0x00,0x00,0x00, /* IPAddress */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0xFF,0xFF,0xFF,0x70 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPGateway */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* NFSServer */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* serviceNode */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ "", /* NFSExportDir[32] */ \
+ "", /* NFSMountDir[32] */ \
+ { 0x00, } /* SecurityKey[32] */ \
+ }, \
+ 0, /* Block_Config */ \
+ { 0, } /* padd[7] */ \
+ }
+
+
+// Define a static initializer for default configuration. (DEFAULTS FOR HARDWARE)
+// This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c
+#define BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER_FOR_HARDWARE { \
+ 0, /* CRC */ \
+ BGP_PERSONALITY_VERSION, /* Version */ \
+ (sizeof(BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \
+ { /* BGP_Personality_Kernel_t: */ \
+ 0, /* MachineLocation */ \
+ BGP_DEFAULT_FREQ, /* FreqMHz */ \
+ BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \
+ BGP_PERS_PROCESSCONFIG_SMP, /* ProcessConfig */ \
+ BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \
+ BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE, /* NodeConfig */ \
+ BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \
+ BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \
+ BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \
+ BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \
+ 0, /* SharedMemMB */ \
+ 0, /* ClockStop0 */ \
+ 0 /* ClockStop1 */ \
+ }, \
+ { /* BGP_Personality_DDR_t: */ \
+ BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \
+ BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \
+ BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \
+ BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \
+ BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \
+ BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \
+ BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \
+ BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \
+ BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \
+ BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \
+ BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \
+ BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \
+ BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \
+ BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \
+ BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \
+ BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \
+ BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \
+ BGP_PERS_DDR_DataStrobeCalib0_DEFAULT, /* DataStrobeCalib0 */ \
+ BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+ BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \
+ BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \
+ BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \
+ BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \
+ BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \
+ }, \
+ { /* BGP_Personality_Networks_t: */ \
+ 0, /* BlockID */ \
+ 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \
+ 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \
+ 0, /* PSetNum */ \
+ 0, /* PSetSize */ \
+ 0, /* RankInPSet */ \
+ 0, /* IOnodes */ \
+ 0, /* Rank */ \
+ 0, /* IOnodeRank */ \
+ { 0, } /* TreeRoutes[ 16 ] */ \
+ }, \
+ { /* BGP_Personality_Ethernet_t: */ \
+ 1536, /* mtu */ \
+ { 0, }, /* EmacID[6] */ \
+ { { 0x00,0x00,0x00,0x00, /* IPAddress */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0xFF,0xFF,0xFF,0x70 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPGateway */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* NFSServer */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* serviceNode */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ "", /* NFSExportDir[32] */ \
+ "", /* NFSMountDir[32] */ \
+ { 0x00, } /* SecurityKey[32] */ \
+ }, \
+ 0, /* Block_Config */ \
+ { 0, } /* padd[7] */ \
+ }
+
+
+
+
+#endif // Add nothing below this line.
diff --git a/arch/powerpc/include/asm/bluegene.h b/arch/powerpc/include/asm/bluegene.h
new file mode 100644
index 00000000000000..d7b98e2a27e171
--- /dev/null
+++ b/arch/powerpc/include/asm/bluegene.h
@@ -0,0 +1,71 @@
+/*
+ * Blue Gene board definitions
+ *
+ * Todd Inglett <tinglett@us.ibm.com>
+ *
+ * Copyright 2005, 2007, 2009 International Business Machines, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __ASM_BLUEGENE_H__
+#define __ASM_BLUEGENE_H__
+
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+
+void __init bgp_init_cns(void);
+void bgp_udbg_putc(char c);
+unsigned int bgp_get_irq(void);
+void bgp_send_ipi(int cpu, int msg);
+void bgp_init_IPI(int cpu, int msg);
+void __init bgp_init_IRQ(void);
+
+/* Interrupt encoding for Blue Gene/P hardware).
+ * Given a BIC group and bit index within the group,
+ * bic_hw_to_irq(group, gint) returns the Linux IRQ number.
+ */
+static inline unsigned bic_hw_to_irq(unsigned group, unsigned gint)
+{
+ return ((group+1) << 5) | (gint & 0x1f);
+}
+
+
+/* Wrappers for CNS calls.
+ * Any pointers must point to locations that will not take TLB misses.
+ */
+int bluegene_testInboxAttention(void);
+int bluegene_testForOutboxCompletion(void);
+int bluegene_writeRASEvent_nonBlocking(unsigned facility,
+ unsigned unit,
+ unsigned short err_code,
+ unsigned numDetails,
+ unsigned details[]);
+int bluegene_writeRASString(unsigned facility,
+ unsigned unit,
+ unsigned short err_code,
+ char* str);
+int bluegene_writeRASString_nonBlocking(unsigned facility,
+ unsigned unit,
+ unsigned short err_code,
+ char* str);
+int bluegene_writeToMailboxConsole(char *msg, unsigned msglen);
+int bluegene_writeToMailboxConsole_nonBlocking(char *msg, unsigned msglen);
+unsigned bluegene_readFromMailboxConsole(char *buf, unsigned bufsize);
+
+int bluegene_macResetPHY(void);
+int bluegene_macTestRxLink(void);
+int bluegene_macTestTxLink(void);
+
+int bluegene_takeCPU(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg));
+
+int bluegene_getPersonality(void* buff, unsigned buffSize);
+
+int bluegene_mapXEMAC(void* baseAddr);
+
+#endif /* __ASSEMBLY__ */
+#endif /* __KERNEL__ */
+#endif
diff --git a/arch/powerpc/include/asm/bluegene_ras.h b/arch/powerpc/include/asm/bluegene_ras.h
new file mode 100644
index 00000000000000..05757e1b4c9d5c
--- /dev/null
+++ b/arch/powerpc/include/asm/bluegene_ras.h
@@ -0,0 +1,107 @@
+/*
+ * Andrew Tauferner
+ *
+ * Copyright 2006, 2007 International Business Machines
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef __BLUEGENE_RAS_H__
+#define __BLUEGENE_RAS_H__
+
+
+typedef enum {
+ bg_comp_none = 0x00,
+ bg_comp_kernel = 0x01,
+ bg_comp_application = 0x02,
+ bg_comp_card = 0x03,
+ bg_comp_mc = 0x04,
+ bg_comp_mcserver = 0x05,
+ bg_comp_mmcs = 0x06,
+ bg_comp_diags = 0x07,
+
+ bg_comp_max // always last
+} bg_ras_comp;
+
+
+typedef enum {
+ bg_subcomp_none = 0x00,
+ bg_subcomp_ppc450 = 0x01,
+ bg_subcomp_fpu = 0x02,
+ bg_subcomp_snoop = 0x03,
+ bg_subcomp_dp0 = 0x04,
+ bg_subcomp_dp1 = 0x05,
+ bg_subcomp_l2 = 0x06,
+ bg_subcomp_l3 = 0x07,
+ bg_subcomp_ddr = 0x08,
+ bg_subcomp_sram = 0x09,
+ bg_subcomp_dma = 0x0a,
+ bg_subcomp_testint = 0x0b,
+ bg_subcomp_testint_dcr = 0x0c,
+ bg_subcomp_lockbox = 0x0d,
+ bg_subcomp_plb = 0x0e,
+ bg_subcomp_collective = 0x0f,
+ bg_subcomp_torus = 0x10,
+ bg_subcomp_globint = 0x11,
+ bg_subcomp_serdes = 0x12,
+ bg_subcomp_upc = 0x13,
+ bg_subcomp_dcr = 0x14,
+ bg_subcomp_bic = 0x15,
+ bg_subcomp_devbus = 0x16,
+ bg_subcomp_netbus = 0x17,
+ bg_subcomp_envmon = 0x18,
+ bg_subcomp_tomal = 0x19,
+ bg_subcomp_xemac = 0x1a,
+ bg_subcomp_phy = 0x1b,
+ bg_subcomp_bootloader = 0x1c,
+ bg_subcomp_cnk = 0x1d,
+ bg_subcomp_ciod = 0x1e,
+ bg_subcomp_svc_host = 0x1f,
+ bg_subcomp_diagnostic = 0x20,
+ bg_subcomp_application = 0x21,
+ bg_subcomp_linux = 0x22,
+ bg_subcomp_cns = 0x23,
+ bg_subcomp_e10000 = 0x24,
+
+ bg_subcomp_max // always last
+} bg_ras_subcomp;
+
+
+typedef enum {
+ bg_code_none = 0x00,
+ bg_code_halted = 0x01,
+ bg_code_script_error = 0x02,
+ bg_code_boot_complete = 0x03,
+ bg_code_panic = 0x04,
+ bg_code_oops = 0x05,
+ bg_code_tty_alloc_failure = 0x06,
+ bg_code_tty_reg_failure = 0x07,
+ bg_code_mbox_thread_create_failure = 0x08,
+ bg_code_sysrq_thread_create_failure = 0x09,
+ bg_code_oom = 0x0a,
+ bg_ras_max // always last
+} bg_ras_code;
+
+
+/*
+ * bg_ras -- RAS data structure
+ */
+#define BG_RAS_DATA_MAX 216
+typedef struct {
+ unsigned short comp;
+ unsigned short subcomp;
+ unsigned short code;
+ unsigned short length;
+ unsigned char data[BG_RAS_DATA_MAX];
+} bg_ras;
+
+
+#define BG_RAS_FILE "/proc/ras"
+#define BG_RAS_ASCII_FILE "/proc/ras_ascii"
+
+
+#endif // __BLUEGENE_RAS_H__
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index 27cc6fdcd3b79e..18693a73b0433c 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -8,6 +8,9 @@
#define PPC44x_MMUCR_TID 0x000000ff
#define PPC44x_MMUCR_STS 0x00010000
+#define PPC44x_MMUCR_SWOA 0x01000000
+#define PPC44x_MMUCR_U1TE 0x00400000
+#define PPC44x_MMUCR_U2SWOAE 0x00200000
#define PPC44x_TLB_PAGEID 0
#define PPC44x_TLB_XLAT 1
@@ -32,9 +35,11 @@
/* Storage attribute and access control fields */
#define PPC44x_TLB_ATTR_MASK 0x0000ff80
+#define PPC44x_TLB_WL1 0x00100000 /* Write-through L1 */
#define PPC44x_TLB_U0 0x00008000 /* User 0 */
#define PPC44x_TLB_U1 0x00004000 /* User 1 */
#define PPC44x_TLB_U2 0x00002000 /* User 2 */
+#define PPC44x_TLB_SWOA PPC44x_TLB_U2 /* SWOA when MMUCR U2SWOAE is enabled */
#define PPC44x_TLB_U3 0x00001000 /* User 3 */
#define PPC44x_TLB_W 0x00000800 /* Caching is write-through */
#define PPC44x_TLB_I 0x00000400 /* Caching is inhibited */
@@ -67,8 +72,13 @@ typedef struct {
#endif /* !__ASSEMBLY__ */
#ifndef CONFIG_PPC_EARLY_DEBUG_44x
+#ifndef CONFIG_BGP
#define PPC44x_EARLY_TLBS 1
#else
+/* Bluegene maps firmware with an early TLB. */
+#define PPC44x_EARLY_TLBS 2
+#endif
+#else
#define PPC44x_EARLY_TLBS 2
#define PPC44x_EARLY_DEBUG_VIRTADDR (ASM_CONST(0xf0000000) \
| (ASM_CONST(CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW) & 0xffff))
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index 1458d95003814d..9256c1e5713164 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -9,7 +9,8 @@
#define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32
-#ifdef CONFIG_NOT_COHERENT_CACHE
+/* For BGP, it is convenient for 'kmalloc' to come back with 32-byte aligned units for torus DMA */
+#if defined(CONFIG_NOT_COHERENT_CACHE) || defined(CONFIG_BGP)
#define ARCH_KMALLOC_MINALIGN L1_CACHE_BYTES
#endif
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 1a0d628eb114bc..471ab83ec18982 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -84,6 +84,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
#define STXVD2X(xs, ra, rb) .long (0x7c000798 | VSX_XX1((xs), (ra), (rb)))
#define LXVD2X(xs, ra, rb) .long (0x7c000698 | VSX_XX1((xs), (ra), (rb)))
+#define LFPDX(frt,ra,rb) .long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(462<<1)
+#define STFPDX(frt,ra,rb) .long (31<<26)|((frt)<<21)|((ra)<<16)|((rb)<<11)|(974<<1)
+
#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
@@ -93,18 +96,26 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR); \
#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base)
#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base)
-#define SAVE_FPR(n, base) stfd n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
-#define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base)
-#define SAVE_4FPRS(n, base) SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
-#define SAVE_8FPRS(n, base) SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
-#define SAVE_16FPRS(n, base) SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
-#define SAVE_32FPRS(n, base) SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
-#define REST_FPR(n, base) lfd n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
-#define REST_2FPRS(n, base) REST_FPR(n, base); REST_FPR(n+1, base)
-#define REST_4FPRS(n, base) REST_2FPRS(n, base); REST_2FPRS(n+2, base)
-#define REST_8FPRS(n, base) REST_4FPRS(n, base); REST_4FPRS(n+4, base)
-#define REST_16FPRS(n, base) REST_8FPRS(n, base); REST_8FPRS(n+8, base)
-#define REST_32FPRS(n, base) REST_16FPRS(n, base); REST_16FPRS(n+16, base)
+#ifndef CONFIG_BGP
+/* Normal FPR save/restore. */
+#define SAVE_FPR(n, b, base) stfd n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#define REST_FPR(n, b, base) lfd n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#else
+/* Blue Gene "double-hummer" FPR save/restore. */
+#define SAVE_FPR(n,b,base) li b,THREAD_FPR0+(16*(n)); STFPDX(n,base,b)
+#define REST_FPR(n,b,base) li b,THREAD_FPR0+(16*(n)); LFPDX(n,base,b)
+#endif
+
+#define SAVE_2FPRS(n, b, base) SAVE_FPR(n, b, base); SAVE_FPR(n+1, b, base)
+#define SAVE_4FPRS(n, b, base) SAVE_2FPRS(n, b, base); SAVE_2FPRS(n+2, b, base)
+#define SAVE_8FPRS(n, b, base) SAVE_4FPRS(n, b, base); SAVE_4FPRS(n+4, b, base)
+#define SAVE_16FPRS(n, b, base) SAVE_8FPRS(n, b, base); SAVE_8FPRS(n+8, b, base)
+#define SAVE_32FPRS(n, b, base) SAVE_16FPRS(n, b, base); SAVE_16FPRS(n+16, b, base)
+#define REST_2FPRS(n, b, base) REST_FPR(n, b, base); REST_FPR(n+1, b, base)
+#define REST_4FPRS(n, b, base) REST_2FPRS(n, b, base); REST_2FPRS(n+2, b, base)
+#define REST_8FPRS(n, b, base) REST_4FPRS(n, b, base); REST_4FPRS(n+4, b, base)
+#define REST_16FPRS(n, b, base) REST_8FPRS(n, b, base); REST_8FPRS(n+8, b, base)
+#define REST_32FPRS(n, b, base) REST_16FPRS(n, b, base); REST_16FPRS(n+16, b, base)
#define SAVE_VR(n,b,base) li b,THREAD_VR0+(16*(n)); stvx n,b,base
#define SAVE_2VRS(n,b,base) SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index d3466490104a55..4dcd99a968683b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -14,8 +14,15 @@
#ifdef CONFIG_VSX
#define TS_FPRWIDTH 2
+#define TS_FPRALIGN
+#else
+#ifdef CONFIG_BGP
+#define TS_FPRWIDTH 2
+#define TS_FPRALIGN __attribute__((aligned(16)))
#else
#define TS_FPRWIDTH 1
+#define TS_FPRALIGN
+#endif
#endif
#ifndef __ASSEMBLY__
@@ -95,8 +102,12 @@ extern struct task_struct *last_task_used_spe;
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
+#if defined(CONFIG_TASK_UNMAPPED_BASE)
+#define TASK_UNMAPPED_BASE (CONFIG_TASK_UNMAPPED_BASE)
+#else
#define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
#endif
+#endif
#ifdef CONFIG_PPC64
/* 64-bit user address space is 44-bits (16TB user VM) */
@@ -166,7 +177,7 @@ struct thread_struct {
unsigned long dbcr1;
#endif
/* FP and VSX 0-31 register set */
- double fpr[32][TS_FPRWIDTH];
+ double fpr[32][TS_FPRWIDTH] TS_FPRALIGN;
struct {
unsigned int pad;
@@ -309,7 +320,7 @@ static inline void prefetchw(const void *x)
#define spin_lock_prefetch(x) prefetchw(x)
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || defined(CONFIG_BGP)
#define HAVE_ARCH_PICK_MMAP_LAYOUT
#endif
diff --git a/arch/powerpc/include/bpcore/bgp_dma_memmap.h b/arch/powerpc/include/bpcore/bgp_dma_memmap.h
new file mode 100644
index 00000000000000..431bed6512cb82
--- /dev/null
+++ b/arch/powerpc/include/bpcore/bgp_dma_memmap.h
@@ -0,0 +1,205 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+
+#ifndef _BGP_DMA_MEMMAP_H_
+#define _BGP_DMA_MEMMAP_H_
+
+#define _BGP_DMA_NUM_INJ_FIFO_GROUPS 4
+#define _BGP_DMA_NUM_INJ_FIFOS_PER_GROUP 32
+#define _BGP_DMA_NUM_INJ_FIFOS (_BGP_DMA_NUM_INJ_FIFO_GROUPS * _BGP_DMA_NUM_INJ_FIFOS_PER_GROUP)
+
+#define _BGP_DMA_NUM_REC_FIFO_GROUPS 4
+#define _BGP_DMA_NUM_REC_FIFOS_PER_GROUP 9
+#define _BGP_DMA_NUM_REC_FIFOS (_BGP_DMA_NUM_REC_FIFO_GROUPS * _BGP_DMA_NUM_REC_FIFOS_PER_GROUP)
+
+/* size = end - start - BGP_FIFO_SAFETY_MARGIN */
+/* so you can disinguish between full and empty, in 16 byte units */
+#define _BGP_DMA_FIFO_SAFETY_MARGIN 1
+#define _BGP_DMA_QUADS_PER_PACKET 16
+
+#define _BGP_DMA_NUM_COUNTER_GROUPS 4
+#define _BGP_DMA_NUM_COUNTERS_PER_GROUP 64
+#define _BGP_DMA_NUM_COUNTERS (_BGP_DMA_NUM_COUNTER_GROUPS * _BGP_DMA_NUM_COUNTERS_PER_GROUP)
+
+/* these are the lower 12 bits */
+/* #define _BGP_DMA_GROUP_A(g) ((g)*0x1000) */
+
+/* ------------------------------------------------ */
+/* Macros defining absolute virtual address */
+/* ------------------------------------------------ */
+#define _BGP_VA_DMA_GROUP_A(g) (_BGP_VA_DMA + ((g)*0x1000))
+
+/* offset start of iDMA */
+#define _BGP_VA_iDMA_GROUP_START(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0 )
+
+/* repeated 32 times i=0 to 31 */
+#define _BGP_VA_iDMA_START(g,i) (_BGP_VA_DMA_GROUP_A(g) + ((i)*0x0010) )
+#define _BGP_VA_iDMA_END(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0004+(i)*0x0010) )
+#define _BGP_VA_iDMA_HEAD(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0008+(i)*0x0010) )
+#define _BGP_VA_iDMA_TAIL(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x000C+(i)*0x0010) )
+#define _BGP_VA_iDMA_NOT_EMPTY(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0200)
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0204) */
+#define _BGP_VA_iDMA_AVAILABLE(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0208)
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x020C) */
+#define _BGP_VA_iDMA_THRESHOLD_CROSSED(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0210)
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0214) */
+#define _BGP_VA_iDMA_CLEAR_THRESHOLD_CROSSED(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0218)
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x021C) */
+#define _BGP_VA_iDMA_ACTIVATED(g) (_BGP_VA_DMA_GROUP_A(g) + 0x220)
+#define _BGP_VA_iDMA_ACTIVATE(g) (_BGP_VA_DMA_GROUP_A(g) + 0x224)
+#define _BGP_VA_iDMA_DEACTIVATE(g) (_BGP_VA_DMA_GROUP_A(g) + 0x228)
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x022C) to ( _BGP_VA_DMA_GROUP_A(g)+0x02FF) */
+/* repeated twice, i=0 to 1 */
+#define _BGP_VA_iDMA_COUNTER_ENABLED(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0300 +(i)*0x0004) )
+#define _BGP_VA_iDMA_COUNTER_ENABLE(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0308 +(i)*0x0004) )
+#define _BGP_VA_iDMA_COUNTER_DISABLE(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0310 +(i)*0x0004) )
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0318) to ( _BGP_VA_DMA_GROUP_A(g)+0x031C) */
+/* repeated twice, i=0 to 1 */
+#define _BGP_VA_iDMA_COUNTER_HIT_ZERO(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0320 +(i)*0x0004) )
+#define _BGP_VA_iDMA_COUNTER_CLEAR_HIT_ZERO(g,i) (_BGP_VA_DMA_GROUP_A(g) + (0x0328 +(i)*0x0004) )
+#define _BGP_VA_iDMA_COUNTER_GRP_STATUS(g) (_BGP_VA_DMA_GROUP_A(g) + 0x0330)
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0334) to ( _BGP_VA_DMA_GROUP_A(g)+0x03FC) */
+/* repeated 64 times i=0 to 63 */
+#define _BGP_VA_iDMA_COUNTER(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0400 +(i)*0x0010) )
+#define _BGP_VA_iDMA_COUNTER_INCREMENT(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0404 +(i)*0x0010) )
+#define _BGP_VA_iDMA_COUNTER_BASE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0408 +(i)*0x0010) )
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x040C) to ( _BGP_VA_DMA_GROUP_A(g)+0x07FC) */
+
+/* offset start of rDMA */
+#define _BGP_VA_rDMA_GROUP_START(g) ( _BGP_VA_DMA_GROUP_A(g) + 0x0800 )
+
+/* repeated 8 times i=0 to 7 */
+#define _BGP_VA_rDMA_START(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0800 + (i)*0x0010) )
+#define _BGP_VA_rDMA_END(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0804 + (i)*0x0010) )
+#define _BGP_VA_rDMA_HEAD(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0808 + (i)*0x0010) )
+#define _BGP_VA_rDMA_TAIL(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x080C + (i)*0x0010) )
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0890) to ( _BGP_VA_DMA_GROUP_A(g)+0x08FC) */
+/* repeated 16 times, 0 to 15 */
+/* below addresses have storage backing them, but are not used by the DMA */
+#define _BGP_NUM_rDMA_UNUSED 16
+#define _BGP_VA_rDMA_UNUSED(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0900 + (i)*0x0004) )
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0940) to ( _BGP_VA_DMA_GROUP_A(g)+0x09FC) */
+
+#define _BGP_VA_rDMA_TAIL(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x080C + (i)*0x0010) )
+/* / repeated 2 times i=0 to 1 */
+#define _BGP_VA_rDMA_NOT_EMPTY(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0A00 + (i)*0x0004) )
+#define _BGP_VA_rDMA_AVAILABLE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0A08 + (i)*0x0004) )
+#define _BGP_VA_rDMA_THRESHOLD_CROSSED(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0A10 + (i)*0x0004) )
+#define _BGP_VA_rDMA_CLEAR_THRESHOLD_CROSSED(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0A18 + (i)*0x0004) )
+ /* HOLE: ( _BGP_DMA_GROUP_A(g)+0x0A1C) to ( _BGP_VA_DMA_GROUP_A(g)+0x0AFC) */
+/* repeat 2 times, i=0 to 1 */
+#define _BGP_VA_rDMA_COUNTER_ENABLED(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B00 + (i)*0x0004) )
+#define _BGP_VA_rDMA_COUNTER_ENABLE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B08 + (i)*0x0004) )
+#define _BGP_VA_rDMA_COUNTER_DISABLE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B10 + (i)*0x0004) )
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0B18) to ( _BGP_VA_DMA_GROUP_A(g)+0x0B1C) */
+/* repeat 2 times, i=0 to 1 */
+#define _BGP_VA_rDMA_COUNTER_HIT_ZERO(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B20 + (i)*0x0004) )
+#define _BGP_VA_rDMA_COUNTER_CLEAR_HIT_ZERO(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0B28 + (i)*0x0004) )
+#define _BGP_VA_rDMA_COUNTER_GRP_STATUS(g) ( _BGP_VA_DMA_GROUP_A(g) + 0x0B30)
+ /* HOLE: ( _BGP_VA_DMA_GROUP_A(g)+0x0B34) to ( _BGP_VA_DMA_GROUP_A(g)+0x0BFC) */
+/* repeat 64 times, i=0 to 63 */
+#define _BGP_VA_rDMA_COUNTER(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0C00 + (i)*0x0010) )
+#define _BGP_VA_rDMA_COUNTER_INCREMENT(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0C04 + (i)*0x0010) )
+#define _BGP_VA_rDMA_COUNTER_BASE(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0C08 + (i)*0x0010) )
+#define _BGP_VA_rDMA_COUNTER_MAX(g,i) ( _BGP_VA_DMA_GROUP_A(g) + (0x0C0C + (i)*0x0010) )
+
+
+
+/* --------------------------------------- */
+/* Macros defining address offset */
+/* --------------------------------------- */
+
+
+/* these are the lower 12 bits */
+#define _BGP_DMA_GROUP_A_OFFSET(g) ((g)*0x1000)
+
+/* ---------------------- */
+/* offset start of iDMA */
+/* ---------------------- */
+#define _BGP_iDMA_GROUP_START_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0 )
+
+/* repeated 32 times i=0 to 31 */
+#define _BGP_iDMA_START_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+(i)*0x0010)
+#define _BGP_iDMA_END_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0004+(i)*0x0010)
+#define _BGP_iDMA_HEAD_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0008+(i)*0x0010)
+#define _BGP_iDMA_TAIL_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x000C+(i)*0x0010)
+#define _BGP_iDMA_NOT_EMPTY_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0200)
+ /* HOLE ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0204) */
+#define _BGP_iDMA_AVAILABLE_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0208)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x020C) */
+#define _BGP_iDMA_THRESHOLD_CROSSED_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0210)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0214) */
+#define _BGP_iDMA_CLEAR_THRESHOLD_CROSSED_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0218)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x021C) */
+#define _BGP_iDMA_ACTIVATED_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x220)
+#define _BGP_iDMA_ACTIVATE_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x224)
+#define _BGP_iDMA_DEACTIVATE_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x228)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x022C) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x02FF) */
+/* repeated twice, i=0 to 1 */
+#define _BGP_iDMA_COUNTER_ENABLED_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0300 +(i)*0x0004)
+#define _BGP_iDMA_COUNTER_ENABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0308 +(i)*0x0004)
+#define _BGP_iDMA_COUNTER_DISABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0310 +(i)*0x0004)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0318) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x031C) */
+/* repeated twice, i=0 to 1 */
+#define _BGP_iDMA_COUNTER_HIT_ZERO_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0320 +(i)*0x0004)
+#define _BGP_iDMA_COUNTER_CLEAR_HIT_ZERO_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0328 +(i)*0x0004)
+#define _BGP_iDMA_COUNTER_GRP_STATUS_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0330)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0334) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x03FC) */
+/* repeated 64 times i=0 to 63 */
+#define _BGP_iDMA_COUNTER_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0400 +(i)*0x0010)
+#define _BGP_iDMA_COUNTER_INCREMENT_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0404 +(i)*0x0010)
+#define _BGP_iDMA_COUNTER_BASE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0408 +(i)*0x0010)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x040C) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x07FC) */
+
+
+/* ----------------------- */
+/* offset start of rDMA */
+/* ----------------------- */
+#define _BGP_rDMA_GROUP_START_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0800 )
+
+/* repeated 8 times i=0 to 7 */
+#define _BGP_rDMA_START_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0800 + (i)*0x0010)
+#define _BGP_rDMA_END_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0804 + (i)*0x0010)
+#define _BGP_rDMA_HEAD_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x0808 + (i)*0x0010)
+#define _BGP_rDMA_TAIL_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+ 0x080C + (i)*0x0010)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0890) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x09FC) */
+/* / repeated 2 times i=0 to 1 */
+#define _BGP_rDMA_NOT_EMPTY_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A00 + (i)*0x0004)
+#define _BGP_rDMA_AVAILABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A08 + (i)*0x0004)
+#define _BGP_rDMA_THRESHOLD_CROSSED_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A10 + (i)*0x0004)
+#define _BGP_rDMA_CLEAR_THRESHOLD_CROSSED_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A18 + (i)*0x0004)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0A1C) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0AFC) */
+/* repeat 2 times, i=0 to 1 */
+#define _BGP_rDMA_COUNTER_ENABLED_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B00 + (i)*0x0004)
+#define _BGP_rDMA_COUNTER_ENABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B08 + (i)*0x0004)
+#define _BGP_rDMA_COUNTER_DISABLE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B10 + (i)*0x0004)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B18) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B1C) */
+/* repeat 2 times, i=0 to 1 */
+#define _BGP_rDMA_COUNTER_HIT_ZERO_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B20 + (i)*0x0004)
+#define _BGP_rDMA_COUNTER_CLEAR_HIT_ZERO_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B28 + (i)*0x0004)
+#define _BGP_rDMA_COUNTER_GRP_STATUS_OFFSET(g) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B30)
+ /* HOLE: ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0B34) to ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0BFC) */
+/* repeat 64 times, i=0 to 63 */
+#define _BGP_rDMA_COUNTER_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C00 + (i)*0x0010)
+#define _BGP_rDMA_COUNTER_INCREMENT_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C04 + (i)*0x0010)
+#define _BGP_rDMA_COUNTER_BASE_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C08 + (i)*0x0010)
+#define _BGP_rDMA_COUNTER_MAX_OFFSET(g,i) ( _BGP_DMA_GROUP_A_OFFSET(g)+0x0C0C + (i)*0x0010)
+
+#endif
diff --git a/arch/powerpc/include/bpcore/bgp_types.h b/arch/powerpc/include/bpcore/bgp_types.h
new file mode 100644
index 00000000000000..e298fa764e4987
--- /dev/null
+++ b/arch/powerpc/include/bpcore/bgp_types.h
@@ -0,0 +1,71 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file bpcore/bgp_types.h
+ */
+
+#ifndef _BGP_TYPES_H_ /* Prevent multiple inclusion. */
+#define _BGP_TYPES_H_
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+
+#if !defined(__ASSEMBLY__) && !defined(__BGP_HIDE_STANDARD_TYPES__)
+
+#include <common/alignment.h>
+
+#ifdef _AIX
+#include <inttypes.h>
+#elif ! defined(__LINUX_KERNEL__)
+#include <stdint.h>
+#include <sys/types.h>
+#else
+#include <linux/types.h>
+#endif
+
+
+typedef int8_t _bgp_i8_t;
+typedef uint8_t _bgp_u8_t;
+typedef int16_t _bgp_i16_t;
+typedef uint16_t _bgp_u16_t;
+typedef int32_t _bgp_i32_t;
+typedef uint32_t _bgp_u32_t;
+typedef int64_t _bgp_i64_t;
+typedef uint64_t _bgp_u64_t;
+
+typedef union T_BGP_QuadWord
+ {
+ uint8_t ub[ 16];
+ uint16_t us[ 8];
+ uint32_t ul[ 4];
+ uint64_t ull[ 2];
+ float f[ 4];
+ double d[ 2];
+ }
+ ALIGN_QUADWORD _bgp_QuadWord_t;
+
+typedef _bgp_QuadWord_t _QuadWord_t;
+
+#endif /* !__ASSEMBLY__ && !__BGP_HIDE_STANDARD_TYPES__ */
+
+__END_DECLS
+
+#endif /* Add nothing below this line. */
diff --git a/arch/powerpc/include/bpcore/ic_memmap.h b/arch/powerpc/include/bpcore/ic_memmap.h
new file mode 100644
index 00000000000000..5ff376f4b622f0
--- /dev/null
+++ b/arch/powerpc/include/bpcore/ic_memmap.h
@@ -0,0 +1,803 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file bpcore/ic_memmap.h
+ */
+
+
+
+/**
+ * BGP Interrupt Controller Register mapping and bit definition.
+ *
+ * Note: preliminary register assignment.
+ */
+
+
+/* ************************************************************************* */
+/* Architected BGP Interrupt Controller Registers */
+/* ************************************************************************* */
+/* Authors: Jose R. Brunheroto, Martin Ohmacht */
+/* Reflects the contents of the document http://w3vlsi.watson.ibm.com// */
+/* */
+/* ************************************************************************* */
+
+
+
+/*
+
+ BIC CRIT hierarchy register
+ +------------------------------------+
+ |0 1 2 3 4 5 6 7 8 9 ... 31 |
+ +------------------------------------+
+ | | | | | | | | | |
+ | | | | | | | BIC UNIT 6
+ | | | | | | | +-----+
+ | | | | | | +-------------------------------- |0-31 | -
+ | | | | | | +-----+
+ | | | | | | BIC UNIT 5
+ | | | | | | +-----+
+ | | | | | +---------------------------------- |0-31 | -.
+ | | | | | +-----+
+ | | | | | BIC GROUP 4
+ | | | | | +-----+
+ | | | | +------------------------------------ |0-31 | -
+ | | | | +-----+
+ | | | | BIC GROUP3
+ | | | | +-----+
+ | | | +-------------------------------------- |0-31 | -
+ | | | +-----+
+ | | | BIC GROUP 2
+ | | | +-----+
+ | | +---------------------------------------- |0-31 | -
+ | | +-----+
+ | | BIC GROUP 1
+ | | +-----+
+ | +------------------------------------------ |0-31 | -
+ | +-----+
+ | BIC GROUP 0
+ | +-----+
+ +-------------------------------------------- |0-31 | -
+ +-----+
+
+
+ BIC NCRIT hierarchy register
+ +------------------------------------+
+ |0 1 2 3 4 5 6 7 8 9 ... 31 |
+ +------------------------------------+
+ | | | | | | | | | |
+ | | | | | | | BIC UNIT 6
+ | | | | | | | +-----+
+ | | | | | | +-------------------------------- |0-31 | -
+ | | | | | | +-----+
+ | | | | | | BIC UNIT 5
+ | | | | | | +-----+
+ | | | | | +---------------------------------- |0-31 | -.
+ | | | | | +-----+
+ | | | | | BIC GROUP 4
+ | | | | | +-----+
+ | | | | +------------------------------------ |0-31 | -
+ | | | | +-----+
+ | | | | BIC GROUP3
+ | | | | +-----+
+ | | | +-------------------------------------- |0-31 | -
+ | | | +-----+
+ | | | BIC GROUP 2
+ | | | +-----+
+ | | +---------------------------------------- |0-31 | -
+ | | +-----+
+ | | BIC GROUP 1
+ | | +-----+
+ | +------------------------------------------ |0-31 | -
+ | +-----+
+ | BIC GROUP 0
+ | +-----+
+ +-------------------------------------------- |0-31 | -
+ +-----+
+
+
+ BIC MCCU hierarchy register
+ +------------------------------------+
+ |0 1 2 3 4 5 6 7 8 9 ... 31 |
+ +------------------------------------+
+ | | | | | | | | | |
+ | | | | | | | BIC UNIT 6
+ | | | | | | | +-----+
+ | | | | | | +-------------------------------- |0-31 | -
+ | | | | | | +-----+
+ | | | | | | BIC UNIT 5
+ | | | | | | +-----+
+ | | | | | +---------------------------------- |0-31 | -.
+ | | | | | +-----+
+ | | | | | BIC GROUP 4
+ | | | | | +-----+
+ | | | | +------------------------------------ |0-31 | -
+ | | | | +-----+
+ | | | | BIC GROUP3
+ | | | | +-----+
+ | | | +-------------------------------------- |0-31 | -
+ | | | +-----+
+ | | | BIC GROUP 2
+ | | | +-----+
+ | | +---------------------------------------- |0-31 | -
+ | | +-----+
+ | | BIC GROUP 1
+ | | +-----+
+ | +------------------------------------------ |0-31 | -
+ | +-----+
+ | BIC GROUP 0
+ | +-----+
+ +-------------------------------------------- |0-31 | -
+ +-----+
+
+*/
+
+
+#ifndef _IC_MEMMAP_H_ /* Prevent multiple inclusion */
+#define _IC_MEMMAP_H_
+
+
+
+#define _BGP_IC_NUMBER_OF_GROUPS (10) /* number of groups (0..9 inclusive) */
+
+
+
+#define _BGP_IC_TARGET_DISABLED 0x00 /* disabled */
+#define _BGP_IC_TARGET_NCRIT_BCAST 0x01 /* non-critical broadcast */
+#define _BGP_IC_TARGET_CRIT_BCAST 0x02 /* critical broadcast */
+#define _BGP_IC_TARGET_MCHK_BCAST 0x03 /* machine check */
+
+#define _BGP_IC_TARGET_NCRIT_CORE0 0x04 /* non-critical core 0 */
+#define _BGP_IC_TARGET_NCRIT_CORE1 0x05 /* non-critical core 1 */
+#define _BGP_IC_TARGET_NCRIT_CORE2 0x06 /* non-critical core 2 */
+#define _BGP_IC_TARGET_NCRIT_CORE3 0x07 /* non-critical core 3 */
+
+#define _BGP_IC_TARGET_CRIT_CORE0 0x08 /* critical core 0 */
+#define _BGP_IC_TARGET_CRIT_CORE1 0x09 /* critical core 1 */
+#define _BGP_IC_TARGET_CRIT_CORE2 0x0A /* critical core 2 */
+#define _BGP_IC_TARGET_CRIT_CORE3 0x0B /* critical core 3 */
+
+#define _BGP_IC_TARGET_MCHK_CORE0 0x0C /* machine check core 0 */
+#define _BGP_IC_TARGET_MCHK_CORE1 0x0D /* machine check core 1 */
+#define _BGP_IC_TARGET_MCHK_CORE2 0x0E /* machine check core 2 */
+#define _BGP_IC_TARGET_MCHK_CORE3 0x0F /* machine check core 3 */
+
+
+typedef struct _BGP_IC_Group_t
+{
+ volatile unsigned int status; /* status (read and write) */
+ volatile unsigned int rd_clr_status; /* status (read and clear) */
+ volatile unsigned int status_clr; /* status (write and clear) */
+ volatile unsigned int status_set; /* status (write and set) */
+
+ volatile unsigned int target_irq0_7; /* target selector (IRQ 0:7) */
+ volatile unsigned int target_irq8_15; /* target selector (IRQ 8:15) */
+ volatile unsigned int target_irq16_23; /* target selector (IRQ 16:23) */
+ volatile unsigned int target_irq24_31; /* target selector (IRQ 24:31) */
+
+ union {
+ volatile unsigned int ncrit_masked_irq[ 4 ]; /* array for easier access */
+ struct {
+ volatile unsigned int ncrit_0_masked_irq; /* non-critical core 0 masked irq (RO) */
+ volatile unsigned int ncrit_1_masked_irq; /* non-critical core 1 masked irq */
+ volatile unsigned int ncrit_2_masked_irq; /* non-critical core 2 masked irq */
+ volatile unsigned int ncrit_3_masked_irq; /* non-critical core 3 masked irq */
+ };
+ };
+
+ union {
+ volatile unsigned int crit_masked_irq[ 4 ]; /* array for easier access */
+ struct {
+ volatile unsigned int crit_0_masked_irq; /* critical core 0 masked irq (RO) */
+ volatile unsigned int crit_1_masked_irq; /* critical core 1 masked irq */
+ volatile unsigned int crit_2_masked_irq; /* critical core 2 masked irq */
+ volatile unsigned int crit_3_masked_irq; /* critical core 3 masked irq */
+ };
+ };
+
+ union {
+ volatile unsigned int mchk_masked_irq[ 4 ]; /* array for easier access */
+ struct {
+ volatile unsigned int mchk_0_masked_irq; /* machine check core 0 masked irq (RO) */
+ volatile unsigned int mchk_1_masked_irq; /* machine check core 1 masked irq */
+ volatile unsigned int mchk_2_masked_irq; /* machine check core 2 masked irq */
+ volatile unsigned int mchk_3_masked_irq; /* machine check core 3 masked irq */
+ };
+ };
+
+ volatile unsigned int ti_mchk_mask; /* (RW) TestInt MachineCheck Mask */
+ volatile unsigned int upc_time_stamp_mask; /* (RW) UPC Time Stamp Mask */
+ volatile unsigned int clock_sync_stop_mask; /* (RW) Clock Sync-Stop Mask */
+
+ volatile unsigned int ti_mchk_wof; /* (RW) TestInt Mchk Who's on First */
+ volatile unsigned int upc_time_stamp_wof; /* (RW) UPC Time Stamp Who's on First */
+ volatile unsigned int clock_sync_stop_wof; /* (RW) Clock Sync-Stop Who's on First */
+
+ volatile unsigned int ti_mchk; /* (RO) TestInt Mchk */
+ volatile unsigned int upc_time_stamp; /* (RO) UPC Time Stamp */
+ volatile unsigned int clock_sync_stop; /* (RO) Clock Sync-Stop */
+
+
+} _BGP_IC_Group_t;
+
+
+
+#define _BGP_IC_MEM_GROUP_SIZE (0x80) /* group size in bytes */
+
+/* macros for indexed access to grouups */
+#define _BGP_IC_MEM_GROUP_OFFSET(_grp) ( _BGP_IC_MEM_GROUP0_OFFSET + (_grp)*_BGP_IC_MEM_GROUP_SIZE )
+
+
+/* Defines BGP Interrupt Controller Register Offset (memory mapped access) */
+#define _BGP_IC_MEM_GROUP0_OFFSET (0x0000) /* Group 0 offset */
+#define _BGP_IC_MEM_GROUP1_OFFSET (0x0080) /* Group 1 offset */
+#define _BGP_IC_MEM_GROUP2_OFFSET (0x0100) /* Group 2 offset */
+#define _BGP_IC_MEM_GROUP3_OFFSET (0x0180) /* Group 3 offset */
+#define _BGP_IC_MEM_GROUP4_OFFSET (0x0200) /* Group 4 offset */
+#define _BGP_IC_MEM_GROUP5_OFFSET (0x0280) /* Group 5 offset */
+#define _BGP_IC_MEM_GROUP6_OFFSET (0x0300) /* Group 6 offset */
+#define _BGP_IC_MEM_GROUP7_OFFSET (0x0380) /* Group 7 offset */
+#define _BGP_IC_MEM_GROUP8_OFFSET (0x0400) /* Group 8 offset */
+#define _BGP_IC_MEM_GROUP9_OFFSET (0x0480) /* Group 9 offset */
+
+/* reserved group offset */
+#define _BGP_IC_MEM_GROUP10_OFFSET (0x0500) /* Group 10 offset */
+#define _BGP_IC_MEM_GROUP11_OFFSET (0x0580) /* Group 11 offset */
+#define _BGP_IC_MEM_GROUP12_OFFSET (0x0600) /* Group 12 offset */
+#define _BGP_IC_MEM_GROUP13_OFFSET (0x0680) /* Group 13 offset */
+#define _BGP_IC_MEM_GROUP14_OFFSET (0x0700) /* Group 14 offset */
+
+
+
+
+/* Hierarchy Registers offsets */
+#define _BGP_IC_MEM_HNCR_OFFSET (0x0780) /* Hierarchy Non-Critical Register */
+#define _BGP_IC_MEM_HNCR0_OFFSET (0x0780) /* Hierarchy Non-Critical Register (core 0) */
+#define _BGP_IC_MEM_HNCR1_OFFSET (0x0784) /* Hierarchy Non-Critical Register (core 1) */
+#define _BGP_IC_MEM_HNCR2_OFFSET (0x0788) /* Hierarchy Non-Critical Register (core 2) */
+#define _BGP_IC_MEM_HNCR3_OFFSET (0x078C) /* Hierarchy Non-Critical Register (core 3) */
+
+
+#define _BGP_IC_MEM_HCR_OFFSET (0x0790) /* Hierarchy Critical Register */
+#define _BGP_IC_MEM_HCR0_OFFSET (0x0790) /* Hierarchy Critical Register (core 0) */
+#define _BGP_IC_MEM_HCR1_OFFSET (0x0794) /* Hierarchy Critical Register (core 1) */
+#define _BGP_IC_MEM_HCR2_OFFSET (0x0798) /* Hierarchy Critical Register (core 2) */
+#define _BGP_IC_MEM_HCR3_OFFSET (0x079C) /* Hierarchy Critical Register (core 3) */
+
+
+#define _BGP_IC_MEM_HMCHKR_OFFSET (0x07A0) /* Hierarchy Machine Check Register */
+#define _BGP_IC_MEM_HMCHKR0_OFFSET (0x07A0) /* Hierarchy Machine Check Register (core 0) */
+#define _BGP_IC_MEM_HMCHKR1_OFFSET (0x07A4) /* Hierarchy Machine Check Register (core 1) */
+#define _BGP_IC_MEM_HMCHKR2_OFFSET (0x07A8) /* Hierarchy Machine Check Register (core 2) */
+#define _BGP_IC_MEM_HMCHKR3_OFFSET (0x07AC) /* Hierarchy Machine Check Register (core 3) */
+
+
+#define _BGP_IC_MEM_HR_TI_MCHECK_OFFSET (0x07B0) /* hierarchy register ti_m_check (RO) */
+#define _BGP_IC_MEM_HR_UPC_TIMESTAMP_OFFSET (0x07B4) /* hierarchy register upc_timestamp_event (RO) */
+#define _BGP_IC_MEM_HR_CI_SYNC_STOP_OFFSET (0x07B8) /* hierarchy register ci_sync_stop (RO) */
+
+
+#define _BGP_IC_MEM_ERR_RW_OFFSET (0x07C0) /* IC Error Register (RW) */
+#define _BGP_IC_MEM_ERR_RDCLR_OFFSET (0x07C4) /* IC Error Register (RO) (Read Clear all bits) */
+#define _BGP_IC_MEM_ERR_ADDR_OFFSET (0x07C8) /* IC Error Address Register (RO) */
+#define _BGP_IC_MEM_ERR_DATA_OFFSET (0x07CC) /* IC Error Data Register (RO) */
+
+
+#define _BGP_IC_MEM_HR_TI_MCHECK_WOF_OFFSET (0x07D0) /* hierarchy register ti_m_check_WOF (RW) */
+#define _BGP_IC_MEM_HR_UPC_TIMESTAMP_WOF_OFFSET (0x07D4) /* hierarchy register upc_timestamp_event_WOF (RW) */
+#define _BGP_IC_MEM_HR_CI_SYNC_STOP_WOF_OFFSET (0x07D8) /* hierarchy register ci_sync_stop_WOF (RW) */
+
+
+
+/* ************************************************************************* */
+/* definitions for each interrupt generating device */
+/* ************************************************************************* */
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 00:31 */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_HIER_POS 0
+#define _BGP_IC_C2C_UNIT_NUM 0
+#define _BGP_IC_C2C_UNIT_POS 0
+#define _BGP_IC_C2C_UNIT_SIZE 32
+#define _BGP_IC_C2C_UNIT_MASK 0xffffffff
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 00:07 (Core 0) */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_C0_HIER_POS 0
+#define _BGP_IC_C2C_C0_UNIT_NUM 0
+#define _BGP_IC_C2C_C0_UNIT_POS 0
+#define _BGP_IC_C2C_C0_UNIT_SIZE 8
+#define _BGP_IC_C2C_C0_UNIT_MASK 0xff000000
+
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 08:15 (Core 1) */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_C1_HIER_POS 0
+#define _BGP_IC_C2C_C1_UNIT_NUM 0
+#define _BGP_IC_C2C_C1_UNIT_POS 8
+#define _BGP_IC_C2C_C1_UNIT_SIZE 8
+#define _BGP_IC_C2C_C1_UNIT_MASK 0x00ff0000
+
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 16:23 (Core 2) */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_C2_HIER_POS 0
+#define _BGP_IC_C2C_C2_UNIT_NUM 0
+#define _BGP_IC_C2C_C2_UNIT_POS 16
+#define _BGP_IC_C2C_C2_UNIT_SIZE 8
+#define _BGP_IC_C2C_C2_UNIT_MASK 0x0000ff00
+
+
+
+/* ************************************************************************* */
+/* Core-to-Core Software interrupts: Group 0 bits 24:31 (Core 3) */
+/* ************************************************************************* */
+
+#define _BGP_IC_C2C_C3_HIER_POS 0
+#define _BGP_IC_C2C_C3_UNIT_NUM 0
+#define _BGP_IC_C2C_C3_UNIT_POS 24
+#define _BGP_IC_C2C_C3_UNIT_SIZE 8
+#define _BGP_IC_C2C_C3_UNIT_MASK 0x000000ff
+
+
+
+
+
+/* ************************************************************************* */
+/* DMA Fatal Interrupt Request: Group 1 bits 00:31 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DMA_FT_HIER_POS 1
+#define _BGP_IC_DMA_FT_UNIT_NUM 1
+#define _BGP_IC_DMA_FT_UNIT_POS 0
+#define _BGP_IC_DMA_FT_UNIT_SIZE 32
+#define _BGP_IC_DMA_FT_UNIT_MASK 0xffffffff
+
+/* ************************************************************************* */
+/* DMA Non-Fatal Interrupt Request: Group 2 bits 00:31 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DMA_NFT_G2_HIER_POS 2
+#define _BGP_IC_DMA_NFT_G2_UNIT_NUM 2
+#define _BGP_IC_DMA_NFT_G2_UNIT_POS 0
+#define _BGP_IC_DMA_NFT_G2_UNIT_SIZE 32
+#define _BGP_IC_DMA_NFT_G2_UNIT_MASK 0xffffffff
+
+/* ************************************************************************* */
+/* DMA Non-Fatal Interrupt Request: Group 3 bits 00:31 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DMA_NFT_G3_HIER_POS 3
+#define _BGP_IC_DMA_NFT_G3_UNIT_NUM 3
+#define _BGP_IC_DMA_NFT_G3_UNIT_POS 0
+#define _BGP_IC_DMA_NFT_G3_UNIT_SIZE 32
+#define _BGP_IC_DMA_NFT_G3_UNIT_MASK 0xffffffff
+
+
+/* ************************************************************************* */
+/* DP0 PU0 Interrupt Request: Group 4 bits 00:02 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DP0_PU0_HIER_POS 4
+#define _BGP_IC_DP0_PU0_UNIT_NUM 4
+#define _BGP_IC_DP0_PU0_UNIT_POS 0
+#define _BGP_IC_DP0_PU0_UNIT_SIZE 3
+#define _BGP_IC_DP0_PU0_UNIT_MASK 0xE0000000
+
+/* ************************************************************************* */
+/* DP0 PU1 Interrupt Request: Group 4 bits 03:05 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DP0_PU1_HIER_POS 4
+#define _BGP_IC_DP0_PU1_UNIT_NUM 4
+#define _BGP_IC_DP0_PU1_UNIT_POS 3
+#define _BGP_IC_DP0_PU1_UNIT_SIZE 3
+#define _BGP_IC_DP0_PU1_UNIT_MASK 0x1C000000
+
+/* ************************************************************************* */
+/* DP1 PU0 Interrupt Request: Group 4 bits 06:08 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DP1_PU0_HIER_POS 4
+#define _BGP_IC_DP1_PU0_UNIT_NUM 4
+#define _BGP_IC_DP1_PU0_UNIT_POS 6
+#define _BGP_IC_DP1_PU0_UNIT_SIZE 3
+#define _BGP_IC_DP1_PU0_UNIT_MASK 0x03800000
+
+/* ************************************************************************* */
+/* DP1 PU1 Interrupt Request: Group 4 bits 09:11 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DP1_PU1_HIER_POS 4
+#define _BGP_IC_DP1_PU1_UNIT_NUM 4
+#define _BGP_IC_DP1_PU1_UNIT_POS 9
+#define _BGP_IC_DP1_PU1_UNIT_SIZE 3
+#define _BGP_IC_DP1_PU1_UNIT_MASK 0x00700000
+
+
+/* ************************************************************************* */
+/* Global Interrupt: Group 4 bits 12:21 */
+/* ************************************************************************* */
+
+#define _BGP_IC_GINT_HIER_POS 4
+#define _BGP_IC_GINT_UNIT_NUM 4
+#define _BGP_IC_GINT_UNIT_POS 12
+#define _BGP_IC_GINT_UNIT_SIZE 10
+#define _BGP_IC_GINT_UNIT_MASK 0x000FFC00
+
+
+/* ************************************************************************* */
+/* SRAM Interrupt Request: Group 4 bits 22:24 */
+/* ************************************************************************* */
+
+#define _BGP_IC_SRAM_HIER_POS 4
+#define _BGP_IC_SRAM_UNIT_NUM 4
+#define _BGP_IC_SRAM_UNIT_POS 22
+#define _BGP_IC_SRAM_UNIT_SIZE 3
+#define _BGP_IC_SRAM_UNIT_MASK 0x00000380
+
+
+/* ************************************************************************* */
+/* TI Global Attention Interrupt request: Group 4 bit 25 */
+/* ************************************************************************* */
+
+#define _BGP_IC_GLOB_ATT_HIER_POS 4
+#define _BGP_IC_GLOB_ATT_UNIT_NUM 4
+#define _BGP_IC_GLOB_ATT_UNIT_POS 25
+#define _BGP_IC_GLOB_ATT_UNIT_SIZE 1
+#define _BGP_IC_GLOB_ATT_UNIT_MASK 0x00000040
+
+
+/* ************************************************************************* */
+/* TI LB Scan Attention Interrupt request: Group 4 bit 26 */
+/* ************************************************************************* */
+
+#define _BGP_IC_LB_SCATTN_HIER_POS 4
+#define _BGP_IC_LB_SCATTN_UNIT_NUM 4
+#define _BGP_IC_LB_SCATTN_UNIT_POS 26
+#define _BGP_IC_LB_SCATTN_UNIT_SIZE 1
+#define _BGP_IC_LB_SCATTN_UNIT_MASK 0x00000020
+
+
+/* ************************************************************************* */
+/* TI AB Scan Attention Interrupt request: Group 4 bit 27 */
+/* ************************************************************************* */
+
+#define _BGP_IC_AB_SCATTN_HIER_POS 4
+#define _BGP_IC_AB_SCATTN_UNIT_NUM 4
+#define _BGP_IC_AB_SCATTN_UNIT_POS 27
+#define _BGP_IC_AB_SCATTN_UNIT_SIZE 1
+#define _BGP_IC_AB_SCATTN_UNIT_MASK 0x00000010
+
+
+/* ************************************************************************* */
+/* TI HB Scan Attention Interrupt request: Group 4 bit 28 */
+/* ************************************************************************* */
+
+#define _BGP_IC_HB_SCATTN_HIER_POS 4
+#define _BGP_IC_HB_SCATTN_UNIT_NUM 4
+#define _BGP_IC_HB_SCATTN_UNIT_POS 28
+#define _BGP_IC_HB_SCATTN_UNIT_SIZE 1
+#define _BGP_IC_HB_SCATTN_UNIT_MASK 0x00000008
+
+
+/* ************************************************************************* */
+/* TI DCR Read Timeout Interrupt request: Group 4 bit 29 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DCR_RD_TO_HIER_POS 4
+#define _BGP_IC_DCR_RD_TO_UNIT_NUM 4
+#define _BGP_IC_DCR_RD_TO_UNIT_POS 29
+#define _BGP_IC_DCR_RD_TO_UNIT_SIZE 1
+#define _BGP_IC_DCR_RD_TO_UNIT_MASK 0x00000004
+
+
+/* ************************************************************************* */
+/* TI DCR Write Timeout Interrupt request: Group 4 bit 30 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DCR_WR_TO_HIER_POS 4
+#define _BGP_IC_DCR_WR_TO_UNIT_NUM 4
+#define _BGP_IC_DCR_WR_TO_UNIT_POS 30
+#define _BGP_IC_DCR_WR_TO_UNIT_SIZE 1
+#define _BGP_IC_DCR_WR_TO_UNIT_MASK 0x00000002
+
+
+
+/* ************************************************************************* */
+/* Collective Non-Critical interrupt: Group 5 bits 00:19 */
+/* ************************************************************************* */
+
+#define _BGP_IC_COLNCRIT_HIER_POS 5
+#define _BGP_IC_COLNCRIT_UNIT_NUM 5
+#define _BGP_IC_COLNCRIT_UNIT_POS 0
+#define _BGP_IC_COLNCRIT_UNIT_SIZE 20
+#define _BGP_IC_COLNCRIT_UNIT_MASK 0xFFFFF000
+
+/* ************************************************************************* */
+/* Collective Critical interrupt: Group 5 bits 20:23 */
+/* ************************************************************************* */
+
+#define _BGP_IC_COLCRIT_HIER_POS 5
+#define _BGP_IC_COLCRIT_UNIT_NUM 5
+#define _BGP_IC_COLCRIT_UNIT_POS 20
+#define _BGP_IC_COLCRIT_UNIT_SIZE 4
+#define _BGP_IC_COLCRIT_UNIT_MASK 0x00000f00
+
+
+/* ************************************************************************* */
+/* SerDesr machine check: Group 6 bits 0:23 */
+/* ************************************************************************* */
+
+#define _BGP_IC_SERDES_MCK_HIER_POS 6
+#define _BGP_IC_SERDES_MCK_UNIT_NUM 6
+#define _BGP_IC_SERDES_MCK_UNIT_POS 0
+#define _BGP_IC_SERDES_MCK_UNIT_SIZE 24
+#define _BGP_IC_SERDES_MCK_UNIT_MASK 0xFFFFFF00
+
+
+/* ************************************************************************* */
+/* UPC interrupt request: Group 6 bit 24 */
+/* ************************************************************************* */
+
+#define _BGP_IC_UPC_HIER_POS 6
+#define _BGP_IC_UPC_UNIT_NUM 6
+#define _BGP_IC_UPC_UNIT_POS 24
+#define _BGP_IC_UPC_UNIT_SIZE 1
+#define _BGP_IC_UPC_UNIT_MASK 0x00000080
+
+
+/* ************************************************************************* */
+/* UPC Error interrupt request: Group 6 bit 25 */
+/* ************************************************************************* */
+
+#define _BGP_IC_UPCERR_HIER_POS 6
+#define _BGP_IC_UPCERR_UNIT_NUM 6
+#define _BGP_IC_UPCERR_UNIT_POS 25
+#define _BGP_IC_UPCERR_UNIT_SIZE 1
+#define _BGP_IC_UPCERR_UNIT_MASK 0x00000040
+
+/* ************************************************************************* */
+/* DCR Bus interrupt request: Group 6 bit 26 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DCRBUS_HIER_POS 6
+#define _BGP_IC_DCRBUS_UNIT_NUM 6
+#define _BGP_IC_DCRBUS_UNIT_POS 26
+#define _BGP_IC_DCRBUS_UNIT_SIZE 1
+#define _BGP_IC_DCRBUS_UNIT_MASK 0x00000020
+
+/* ************************************************************************* */
+/* BIC machine check: Group 6 bit 27 */
+/* ************************************************************************* */
+
+#define _BGP_IC_BIC_MCHK_HIER_POS 6
+#define _BGP_IC_BIC_MCHK_UNIT_NUM 6
+#define _BGP_IC_BIC_MCHK_UNIT_POS 27
+#define _BGP_IC_BIC_MCHK_UNIT_SIZE 1
+#define _BGP_IC_BIC_MCHK_UNIT_MASK 0x00000010
+
+/* ************************************************************************* */
+/* BIC interrupt request: Group 6 bit 28 */
+/* ************************************************************************* */
+
+#define _BGP_IC_BIC_IRQ_HIER_POS 6
+#define _BGP_IC_BIC_IRQ_UNIT_NUM 6
+#define _BGP_IC_BIC_IRQ_UNIT_POS 28
+#define _BGP_IC_BIC_IRQ_UNIT_SIZE 1
+#define _BGP_IC_BIC_IRQ_UNIT_MASK 0x00000008
+
+/* ************************************************************************* */
+/* DEVBUS interrupt request: Group 6 bit 29 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DEVBUS_IRQ_HIER_POS 6
+#define _BGP_IC_DEVBUS_IRQ_UNIT_NUM 6
+#define _BGP_IC_DEVBUS_IRQ_UNIT_POS 29
+#define _BGP_IC_DEVBUS_IRQ_UNIT_SIZE 1
+#define _BGP_IC_DEVBUS_IRQ_UNIT_MASK 0x00000004
+
+/* ************************************************************************* */
+/* Clockstop Stopped interrupt request: Group 6 bit 30 */
+/* ************************************************************************* */
+
+#define _BGP_IC_CLK_STOP_HIER_POS 6
+#define _BGP_IC_CLK_STOP_UNIT_NUM 6
+#define _BGP_IC_CLK_STOP_UNIT_POS 30
+#define _BGP_IC_CLK_STOP_UNIT_SIZE 1
+#define _BGP_IC_CLK_STOP_UNIT_MASK 0x00000002
+
+/* ************************************************************************* */
+/* Environment Monitor interrupt request: Group 6 bit 31 */
+/* ************************************************************************* */
+
+#define _BGP_IC_ENV_MON_HIER_POS 6
+#define _BGP_IC_ENV_MON_UNIT_NUM 6
+#define _BGP_IC_ENV_MON_UNIT_POS 31
+#define _BGP_IC_ENV_MON_UNIT_SIZE 1
+#define _BGP_IC_ENV_MON_UNIT_MASK 0x00000001
+
+
+/* ************************************************************************* */
+/* L30 machine check: Group 7 bits 0:10 */
+/* ************************************************************************* */
+
+#define _BGP_IC_L30_MCHK_HIER_POS 7
+#define _BGP_IC_L30_MCHK_UNIT_NUM 7
+#define _BGP_IC_L30_MCHK_UNIT_POS 0
+#define _BGP_IC_L30_MCHK_UNIT_SIZE 11
+#define _BGP_IC_L30_MCHK_UNIT_MASK 0xFFE00000
+
+/* ************************************************************************* */
+/* L30 interrupt request: Group 7 bits 11 */
+/* ************************************************************************* */
+
+#define _BGP_IC_L30_IRQ_HIER_POS 7
+#define _BGP_IC_L30_IRQ_UNIT_NUM 7
+#define _BGP_IC_L30_IRQ_UNIT_POS 11
+#define _BGP_IC_L30_IRQ_UNIT_SIZE 1
+#define _BGP_IC_L30_IRQ_UNIT_MASK 0x00100000
+
+/* ************************************************************************* */
+/* L31 machine check: Group 7 bits 12:22 */
+/* ************************************************************************* */
+
+#define _BGP_IC_L31_MCHK_HIER_POS 7
+#define _BGP_IC_L31_MCHK_UNIT_NUM 7
+#define _BGP_IC_L31_MCHK_UNIT_POS 12
+#define _BGP_IC_L31_MCHK_UNIT_SIZE 11
+#define _BGP_IC_L31_MCHK_UNIT_MASK 0x000FFE00
+
+/* ************************************************************************* */
+/* L31 interrupt request: Group 7 bits 23 */
+/* ************************************************************************* */
+
+#define _BGP_IC_L31_IRQ_HIER_POS 7
+#define _BGP_IC_L31_IRQ_UNIT_NUM 7
+#define _BGP_IC_L31_IRQ_UNIT_POS 23
+#define _BGP_IC_L31_IRQ_UNIT_SIZE 1
+#define _BGP_IC_L31_IRQ_UNIT_MASK 0x00000100
+
+
+/* ************************************************************************* */
+/* DDR 0 Recoverable error: Group 7 bit 24 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR0_RERR_HIER_POS 7
+#define _BGP_IC_DDR0_RERR_UNIT_NUM 7
+#define _BGP_IC_DDR0_RERR_UNIT_POS 24
+#define _BGP_IC_DDR0_RERR_UNIT_SIZE 1
+#define _BGP_IC_DDR0_RERR_UNIT_MASK 0x00000080
+
+/* ************************************************************************* */
+/* DDR 0 Special Attention: Group 7 bit 25 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR0_SATT_HIER_POS 7
+#define _BGP_IC_DDR0_SATT_UNIT_NUM 7
+#define _BGP_IC_DDR0_SATT_UNIT_POS 25
+#define _BGP_IC_DDR0_SATT_UNIT_SIZE 1
+#define _BGP_IC_DDR0_SATT_UNIT_MASK 0x00000040
+
+/* ************************************************************************* */
+/* DDR 0 Machine Check: Group 7 bit 26 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR0_MCHK_HIER_POS 7
+#define _BGP_IC_DDR0_MCHK_UNIT_NUM 7
+#define _BGP_IC_DDR0_MCHK_UNIT_POS 26
+#define _BGP_IC_DDR0_MCHK_UNIT_SIZE 1
+#define _BGP_IC_DDR0_MCHK_UNIT_MASK 0x00000020
+
+
+/* ************************************************************************* */
+/* DDR 1 Recoverable error: Group 7 bit 27 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR1_RERR_HIER_POS 7
+#define _BGP_IC_DDR1_RERR_UNIT_NUM 7
+#define _BGP_IC_DDR1_RERR_UNIT_POS 27
+#define _BGP_IC_DDR1_RERR_UNIT_SIZE 1
+#define _BGP_IC_DDR1_RERR_UNIT_MASK 0x00000010
+
+/* ************************************************************************* */
+/* DDR 1 Special Attention: Group 7 bit 28 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR1_SATT_HIER_POS 7
+#define _BGP_IC_DDR1_SATT_UNIT_NUM 7
+#define _BGP_IC_DDR1_SATT_UNIT_POS 28
+#define _BGP_IC_DDR1_SATT_UNIT_SIZE 1
+#define _BGP_IC_DDR1_SATT_UNIT_MASK 0x00000008
+
+/* ************************************************************************* */
+/* DDR 1 Machine Check: Group 7 bit 29 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DDR1_MCHK_HIER_POS 7
+#define _BGP_IC_DDR1_MCHK_UNIT_NUM 7
+#define _BGP_IC_DDR1_MCHK_UNIT_POS 29
+#define _BGP_IC_DDR1_MCHK_UNIT_SIZE 1
+#define _BGP_IC_DDR1_MCHK_UNIT_MASK 0x00000004
+
+
+/* ************************************************************************* */
+/* Test Interface interrupt request: Group 7 bit 30:31 */
+/* ************************************************************************* */
+
+#define _BGP_IC_TESTINT_HIER_POS 7
+#define _BGP_IC_TESTINT_UNIT_NUM 7
+#define _BGP_IC_TESTINT_UNIT_POS 30
+#define _BGP_IC_TESTINT_UNIT_SIZE 2
+#define _BGP_IC_TESTINT_UNIT_MASK 0x00000003
+
+
+/* ************************************************************************* */
+/* Ethernet TOMAL interrupt request: Group 8 bits 0:1 */
+/* ************************************************************************* */
+
+#define _BGP_IC_TOMAL_HIER_POS 8
+#define _BGP_IC_TOMAL_UNIT_NUM 8
+#define _BGP_IC_TOMAL_UNIT_POS 0
+#define _BGP_IC_TOMAL_UNIT_SIZE 2
+#define _BGP_IC_TOMAL_UNIT_MASK 0xC0000000
+
+
+
+/* ************************************************************************* */
+/* Ethernet XEMAC interrupt request: Group 9 bits 0 */
+/* ************************************************************************* */
+
+#define _BGP_IC_XEMAC_HIER_POS 9
+#define _BGP_IC_XEMAC_UNIT_NUM 9
+#define _BGP_IC_XEMAC_UNIT_POS 0
+#define _BGP_IC_XEMAC_UNIT_SIZE 1
+#define _BGP_IC_XEMAC_UNIT_MASK 0x80000000
+
+/* ************************************************************************* */
+/* Ethernet interrupt request: Group 9 bits 1 */
+/* ************************************************************************* */
+
+#define _BGP_IC_ETH_HIER_POS 9
+#define _BGP_IC_ETH_UNIT_NUM 9
+#define _BGP_IC_ETH_UNIT_POS 1
+#define _BGP_IC_ETH_UNIT_SIZE 1
+#define _BGP_IC_ETH_UNIT_MASK 0x40000000
+
+/* ************************************************************************* */
+/* Ethernet XENPAK interrupt request: Group 9 bits 2 */
+/* ************************************************************************* */
+
+#define _BGP_IC_XENPAK_HIER_POS 9
+#define _BGP_IC_XENPAK_UNIT_NUM 9
+#define _BGP_IC_XENPAK_UNIT_POS 2
+#define _BGP_IC_XENPAK_UNIT_SIZE 1
+#define _BGP_IC_XENPAK_UNIT_MASK 0x20000000
+
+
+
+
+#endif
diff --git a/arch/powerpc/include/common/alignment.h b/arch/powerpc/include/common/alignment.h
new file mode 100644
index 00000000000000..10bfd376734dfa
--- /dev/null
+++ b/arch/powerpc/include/common/alignment.h
@@ -0,0 +1,66 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/alignment.h
+ */
+
+#ifndef _ALIGNMENT_H_ /* Prevent multiple inclusion */
+#define _ALIGNMENT_H_
+
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+#if defined(__ASSEMBLY__)
+
+#define ALIGN_L1_DIRTYBIT 3
+#define ALIGN_QUADWORD 4
+#define ALIGN_L1_CACHE 5
+#define ALIGN_L1I_CACHE 5
+#define ALIGN_L1D_CACHE 5
+#define ALIGN_L3_CACHE 7
+
+#elif defined(__GNUC__) || defined(__xlC__)
+
+#define ALIGN_L1_DIRTYBIT __attribute__ ((aligned ( 8)))
+#define ALIGN_QUADWORD __attribute__ ((aligned ( 16)))
+#define ALIGN_L1_CACHE __attribute__ ((aligned ( 32)))
+#define ALIGN_L1I_CACHE __attribute__ ((aligned ( 32)))
+#define ALIGN_L1D_CACHE __attribute__ ((aligned ( 32)))
+#define ALIGN_L3_CACHE __attribute__ ((aligned (128)))
+
+#else
+
+#warning "Need alignment directives for your compiler!"
+
+#define ALIGN_QUADWORD
+#define ALIGN_L1_CACHE
+#define ALIGN_L1I_CACHE
+#define ALIGN_L1D_CACHE
+#define ALIGN_L3_CACHE
+
+#endif /* __ASSEMBLY__ */
+
+__END_DECLS
+
+
+
+#endif /* Add nothing below this line */
diff --git a/arch/powerpc/include/common/bgp_bitnumbers.h b/arch/powerpc/include/common/bgp_bitnumbers.h
new file mode 100644
index 00000000000000..8a0b2bce175129
--- /dev/null
+++ b/arch/powerpc/include/common/bgp_bitnumbers.h
@@ -0,0 +1,113 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/bgp_bitnumbers.h
+ */
+
+#ifndef _BGL_BITNUMBERS_H_ /* Prevent multiple inclusion */
+#define _BGL_BITNUMBERS_H_
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+/* These defines allows use of IBM's bit numberings (MSb=0, LSb=31)for multi-bit fields */
+/* b = IBM bit number of the least significant bit (highest number) */
+/* x = value to set in field */
+/* s = size */
+#define _BS(b,x,s)( ( ( x) & ( 0x7FFFFFFF>> ( 31- ( s)))) << ( 31- ( b)))
+#define _BG(b,x,s)( ( _BS(b,0x7FFFFFFF,s) & x ) >> (31-b) )
+#define _BS64(b,x,s)( ( ( x) & ( 0x7FFFFFFFFFFFFFFFLL>> ( 63- ( s)))) << ( 63- ( b)))
+#define _BG64(b,x,s)( ( _BS64(b, 0x7FFFFFFFFFFFFFFFLL,s) & x ) >> (63-b) )
+#define _BN(b) ((1<<(31-(b))))
+#define _B1(b,x) (((x)&0x1)<<(31-(b)))
+#define _B2(b,x) (((x)&0x3)<<(31-(b)))
+#define _B3(b,x) (((x)&0x7)<<(31-(b)))
+#define _B4(b,x) (((x)&0xF)<<(31-(b)))
+#define _B5(b,x) (((x)&0x1F)<<(31-(b)))
+#define _B6(b,x) (((x)&0x3F)<<(31-(b)))
+#define _B7(b,x) (((x)&0x7F)<<(31-(b)))
+#define _B8(b,x) (((x)&0xFF)<<(31-(b)))
+#define _B9(b,x) (((x)&0x1FF)<<(31-(b)))
+#define _B10(b,x) (((x)&0x3FF)<<(31-(b)))
+#define _B11(b,x) (((x)&0x7FF)<<(31-(b)))
+#define _B12(b,x) (((x)&0xFFF)<<(31-(b)))
+#define _B13(b,x) (((x)&0x1FFF)<<(31-(b)))
+#define _B14(b,x) (((x)&0x3FFF)<<(31-(b)))
+#define _B15(b,x) (((x)&0x7FFF)<<(31-(b)))
+#define _B16(b,x) (((x)&0xFFFF)<<(31-(b)))
+#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b)))
+#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b)))
+#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b)))
+#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b)))
+#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b)))
+#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b)))
+#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b)))
+#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b)))
+#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b)))
+#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b)))
+#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b)))
+#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b)))
+#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b)))
+#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b)))
+#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b)))
+
+#ifndef __ASSEMBLY__
+
+/* These defines ease extraction of bitfields. (Not useful in assembler code.) */
+/* x = 32 bit value */
+/* b = IBM bit number of least significant bit of field */
+/* when b is a const, compiler should generate a single rotate-and-mask instruction */
+#define _GN(x,b) (((x)>>(31-(b)))&0x1)
+#define _G2(x,b) (((x)>>(31-(b)))&0x3)
+#define _G3(x,b) (((x)>>(31-(b)))&0x7)
+#define _G4(x,b) (((x)>>(31-(b)))&0xF)
+#define _G5(x,b) (((x)>>(31-(b)))&0x1F)
+#define _G6(x,b) (((x)>>(31-(b)))&0x3F)
+#define _G7(x,b) (((x)>>(31-(b)))&0x7F)
+#define _G8(x,b) (((x)>>(31-(b)))&0xFF)
+#define _G9(x,b) (((x)>>(31-(b)))&0x1FF)
+#define _G10(x,b) (((x)>>(31-(b)))&0x3FF)
+#define _G11(x,b) (((x)>>(31-(b)))&0x7FF)
+#define _G12(x,b) (((x)>>(31-(b)))&0xFFF)
+#define _G13(x,b) (((x)>>(31-(b)))&0x1FFF)
+#define _G14(x,b) (((x)>>(31-(b)))&0x3FFF)
+#define _G15(x,b) (((x)>>(31-(b)))&0x7FFF)
+#define _G16(x,b) (((x)>>(31-(b)))&0xFFFF)
+#define _G17(x,b) (((x)>>(31-(b)))&0x1FFFF)
+#define _G18(x,b) (((x)>>(31-(b)))&0x3FFFF)
+#define _G19(x,b) (((x)>>(31-(b)))&0x7FFFF)
+#define _G20(x,b) (((x)>>(31-(b)))&0xFFFFF)
+#define _G21(x,b) (((x)>>(31-(b)))&0x1FFFFF)
+#define _G22(x,b) (((x)>>(31-(b)))&0x3FFFFF)
+#define _G23(x,b) (((x)>>(31-(b)))&0x7FFFFF)
+#define _G24(x,b) (((x)>>(31-(b)))&0xFFFFFF)
+#define _G25(x,b) (((x)>>(31-(b)))&0x1FFFFFF)
+#define _G26(x,b) (((x)>>(31-(b)))&0x3FFFFFF)
+#define _G27(x,b) (((x)>>(31-(b)))&0x7FFFFFF)
+#define _G28(x,b) (((x)>>(31-(b)))&0xFFFFFFF)
+#define _G29(x,b) (((x)>>(31-(b)))&0x1FFFFFFF)
+#define _G30(x,b) (((x)>>(31-(b)))&0x3FFFFFFF)
+#define _G31(x,b) (((x)>>(31-(b)))&0x7FFFFFFF)
+
+#endif /* __ASSEMBLY__ */
+
+__END_DECLS
+
+#endif /* Add nothing below this line. */
diff --git a/arch/powerpc/include/common/bgp_chipversion.h b/arch/powerpc/include/common/bgp_chipversion.h
new file mode 100644
index 00000000000000..eba213a720a0b3
--- /dev/null
+++ b/arch/powerpc/include/common/bgp_chipversion.h
@@ -0,0 +1,52 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/bgp_chipversion.h
+ */
+
+#ifndef _BGP_CHIPVERSION_H_ /* Prevent multiple inclusion */
+#define _BGP_CHIPVERSION_H_
+
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+#define BGP_CHIPVERSION_DD2
+
+#if defined BGP_CHIPVERSION_DD1
+/* Settings for DD1 */
+#define BGP_DD1_WORKAROUNDS 1
+
+#elif defined BGP_CHIPVERSION_DD2
+/* Settings for DD2 */
+
+#else
+/* */
+#error "Invalid chip version setting"
+
+#endif
+
+
+__END_DECLS
+
+
+
+#endif /* Add nothing below this line. */
diff --git a/arch/powerpc/include/common/bgp_personality.h b/arch/powerpc/include/common/bgp_personality.h
new file mode 100644
index 00000000000000..9d64516c8a9ede
--- /dev/null
+++ b/arch/powerpc/include/common/bgp_personality.h
@@ -0,0 +1,786 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/bgp_personality.h
+ */
+
+#ifndef _BGP_PERSONALITY_H_ /* Prevent multiple inclusion */
+#define _BGP_PERSONALITY_H_
+
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+#include <common/bgp_chipversion.h>
+#include <common/alignment.h>
+#include <common/bgp_bitnumbers.h>
+#include <bpcore/bgp_types.h>
+
+/* */
+/* I/O Node Linux currently hard-codes the personality address. */
+/* */
+#define _BGP_HARDCODED_PERSONALITY_SRAM_ADDRESS (0xFFFFF800)
+
+#define _BGP_PERSONALITY_VERSION (0x0A)
+
+#define _BGP_DEFAULT_FREQ (850) /* Match the current DD2 hardware */
+
+#define _BGP_PERS_Unused_DEFAULT 0
+
+#define _BGP_PERS_PROCESSCONFIG_DIAGS (0xFF000000) /* Diagnostic Mode: All Cores Enabled and Privileged in Process 0 */
+#define _BGP_PERS_PROCESSCONFIG_SMP (0x0F000000) /* All Cores Enabled User-Space in Process 0 */
+#define _BGP_PERS_PROCESSCONFIG_VNM (0x08040201) /* 4 Single-Core Processes (a.k.a. Virtual Nodes) */
+#define _BGP_PERS_PROCESSCONFIG_2x2 (0x0C030000) /* 2 Processes of 2 Cores each in same DP unit */
+#define _BGP_PERS_PROCESSCONFIG_DEFAULT (_BGP_PERS_PROCESSCONFIG_DIAGS)
+#define _BGP_PERS_PROCESSCONFIG_PRIV_MSK (0xF0F0F0F0) /* Mask to isolate privileged core flags */
+
+
+/* Personality.Kernel_Config.RASPolicy */
+#define _BGP_PERS_RASPOLICY_VERBOSITY(x) _B2( 1,x) /* Verbosity as shown below */
+#define _BGP_PERS_RASPOLICY_MINIMAL _BGP_PERS_RASPOLICY_VERBOSITY(0) /* Benchmarking Level of Capture and Reporting */
+#define _BGP_PERS_RASPOLICY_NORMAL _BGP_PERS_RASPOLICY_VERBOSITY(1) /* Normal Production Level of Capture and Reporting */
+#define _BGP_PERS_RASPOLICY_VERBOSE _BGP_PERS_RASPOLICY_VERBOSITY(2) /* Manufacturing Test and Diagnostics */
+#define _BGP_PERS_RASPOLICY_EXTREME _BGP_PERS_RASPOLICY_VERBOSITY(3) /* Report Every Event Immediately - Thresholds set to 1 */
+#define _BGP_PERS_RASPOLICY_FATALEXIT _BN( 2) /* Fatal is Fatal, so exit. */
+
+#define _BGP_PERS_RASPOLICY_DEFAULT (_BGP_PERS_RASPOLICY_VERBOSE | _BGP_PERS_RASPOLICY_FATALEXIT)
+
+
+#define _BGP_PERSONALITY_LEN_NFSDIR (32) /* 32bytes */
+
+#define _BGP_PERSONALITY_LEN_SECKEY (32) /* 32bytes */
+
+/* Personality.NodeConfig Driver Enables and Configurations */
+#define _BGP_PERS_ENABLE_Simulation _BN( 0) /* Running on VHDL Simulation */
+#define _BGP_PERS_ENABLE_LockBox _BN( 1)
+#define _BGP_PERS_ENABLE_BIC _BN( 2)
+#define _BGP_PERS_ENABLE_DDR _BN( 3) /* DDR Controllers (not Fusion DDR model) */
+#define _BGP_PERS_ENABLE_LoopBack _BN( 4) /* LoopBack: Internal TS/TR or SerDes Loopback */
+#define _BGP_PERS_ENABLE_GlobalInts _BN( 5)
+#define _BGP_PERS_ENABLE_Collective _BN( 6) /* Enable Collective Network */
+#define _BGP_PERS_ENABLE_Torus _BN( 7)
+#define _BGP_PERS_ENABLE_TorusMeshX _BN( 8) /* Torus is a Mesh in the X-dimension */
+#define _BGP_PERS_ENABLE_TorusMeshY _BN( 9) /* Torus is a Mesh in the Y-dimension */
+#define _BGP_PERS_ENABLE_TorusMeshZ _BN(10) /* Torus is a Mesh in the Z-dimension */
+#define _BGP_PERS_ENABLE_TreeA _BN(11) /* Enable Collective Network A-link */
+#define _BGP_PERS_ENABLE_TreeB _BN(12) /* Enable Collective Network B-link */
+#define _BGP_PERS_ENABLE_TreeC _BN(13) /* Enable Collective Network C-link */
+#define _BGP_PERS_ENABLE_DMA _BN(14)
+#define _BGP_PERS_ENABLE_SerDes _BN(15)
+#define _BGP_PERS_ENABLE_UPC _BN(16)
+#define _BGP_PERS_ENABLE_EnvMon _BN(17)
+#define _BGP_PERS_ENABLE_Ethernet _BN(18)
+#define _BGP_PERS_ENABLE_JTagLoader _BN(19) /* Converse with JTag Host to load kernel */
+#define _BGP_PERS_ENABLE_MailBoxReceive _BGP_PERS_ENABLE_JTagLoader
+#define _BGP_PERS_ENABLE_PowerSave _BN(20) /* Turn off unused devices (Eth on CN, TS on ION) */
+#define _BGP_PERS_ENABLE_FPU _BN(21) /* Enable Double-Hummers (not supported in EventSim) */
+#define _BGP_PERS_ENABLE_StandAlone _BN(22) /* Disable "CIOD" interface, Requires Collective! */
+#define _BGP_PERS_ENABLE_TLBMisses _BN(23) /* TLB Misses vs Wasting Memory (see bgp_AppSetup.c) */
+#define _BGP_PERS_ENABLE_Mambo _BN(24) /* Running under Mambo? Used by Linux */
+#define _BGP_PERS_ENABLE_TreeBlast _BN(25) /* Enable Tree "Blast" mode */
+#define _BGP_PERS_ENABLE_BlindStacks _BN(26) /* For "XB" Tests, Lock 16K Stacks in Blind Device */
+#define _BGP_PERS_ENABLE_CNK_Malloc _BN(27) /* Enable Malloc Support in CNK. */
+#define _BGP_PERS_ENABLE_Reproducibility _BN(28) /* Enable Cycle Reproducibility */
+#define _BGP_PERS_ENABLE_HighThroughput _BN(29) /* Enable high throughput computing mode */
+#define _BGP_PERS_ENABLE_DiagnosticsMode _BN(30) /* Enable diagnostics mode */
+
+/* Configure L1+L2 into BG/L Mode (s/w managed L1 coherence, write-back) */
+/* This overrides most L1, L2, and Snoop settings. Carefull!! */
+#define _BGP_PERS_ENABLE_BGLMODE _BN(31) /* (not yet fully implemented) */
+
+/* Default Setup for Simulation: Torus Meshes, DMA, SerDes, Ethernet, JTagLoader, PowerSave */
+
+#define _BGP_PERS_NODECONFIG_DEFAULT (_BGP_PERS_ENABLE_Simulation |\
+ _BGP_PERS_ENABLE_LockBox |\
+ _BGP_PERS_ENABLE_BIC |\
+ _BGP_PERS_ENABLE_DDR |\
+ _BGP_PERS_ENABLE_LoopBack |\
+ _BGP_PERS_ENABLE_GlobalInts |\
+ _BGP_PERS_ENABLE_Collective |\
+ _BGP_PERS_ENABLE_Torus |\
+ _BGP_PERS_ENABLE_UPC |\
+ _BGP_PERS_ENABLE_EnvMon |\
+ _BGP_PERS_ENABLE_FPU |\
+ _BGP_PERS_ENABLE_TLBMisses |\
+ _BGP_PERS_ENABLE_StandAlone)
+
+/* Default Setup for Hardware: */
+/* Supports Stand-Alone CNA Applications. */
+/* Bootloader-Extensions and XB's must turn-off JTagLoader */
+#define _BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE (_BGP_PERS_ENABLE_JTagLoader |\
+ _BGP_PERS_ENABLE_LockBox |\
+ _BGP_PERS_ENABLE_BIC |\
+ _BGP_PERS_ENABLE_DDR |\
+ _BGP_PERS_ENABLE_GlobalInts |\
+ _BGP_PERS_ENABLE_Collective |\
+ _BGP_PERS_ENABLE_SerDes |\
+ _BGP_PERS_ENABLE_UPC |\
+ _BGP_PERS_ENABLE_EnvMon |\
+ _BGP_PERS_ENABLE_FPU |\
+ _BGP_PERS_ENABLE_TLBMisses |\
+ _BGP_PERS_ENABLE_StandAlone)
+
+
+
+/* these fields are defined by the control system depending on compute/io node */
+/* _BGP_PERS_ENABLE_Torus | */
+/* _BGP_PERS_ENABLE_TorusMeshX | */
+/* _BGP_PERS_ENABLE_TorusMeshY | */
+/* _BGP_PERS_ENABLE_TorusMeshZ | */
+
+
+
+/* Personality.L1Config: Controls and Settings for L1 Cache */
+#define _BGP_PERS_L1CONFIG_L1I _BN( 0) /* L1 Enabled for Instructions */
+#define _BGP_PERS_L1CONFIG_L1D _BN( 1) /* L1 Enabled for Data */
+#define _BGP_PERS_L1CONFIG_L1SWOA _BN( 2) /* L1 Store WithOut Allocate */
+#define _BGP_PERS_L1CONFIG_L1Recovery _BN( 3) /* L1 Full Recovery Mode */
+#define _BGP_PERS_L1CONFIG_L1WriteThru _BN( 4) /* L1 Write-Thru (not svc_host changeable (yet?)) */
+#define _BGP_PERS_L1CONFIG_DO_L1ITrans _BN( 5) /* Enable L1 Instructions Transient? */
+#define _BGP_PERS_L1CONFIG_DO_L1DTrans _BN( 6) /* Enable L1 Data Transient? */
+ /* unused 9bits: 7..15 */
+#define _BGP_PERS_L1CONFIG_L1ITrans(x) _B8(23,x) /* L1 Transient for Instructions in Groups of 16 Lines */
+#define _BGP_PERS_L1CONFIG_L1DTrans(x) _B8(31,x) /* L1 Transient for Data in Groups of 16 Lines */
+
+#define _BGP_PERS_L1CONFIG_DEFAULT (_BGP_PERS_L1CONFIG_L1I |\
+ _BGP_PERS_L1CONFIG_L1D |\
+ _BGP_PERS_L1CONFIG_L1SWOA |\
+ _BGP_PERS_L1CONFIG_L1Recovery |\
+ _BGP_PERS_L1CONFIG_L1WriteThru)
+
+typedef union T_BGP_Pers_L1Cfg
+ {
+ uint32_t l1cfg;
+ struct {
+ unsigned l1i : 1;
+ unsigned l1d : 1;
+ unsigned l1swoa : 1;
+ unsigned l1recovery : 1;
+ unsigned l1writethru : 1;
+ unsigned do_l1itrans : 1;
+ unsigned do_l1dtrans : 1;
+ unsigned l1rsvd : 9;
+ unsigned l1itrans : 8;
+ unsigned l1dtrans : 8;
+ };
+ }
+ _BGP_Pers_L1Cfg;
+
+/* Personality.L2Config: Controls and Settings for L2 and Snoop */
+#define _BGP_PERS_L2CONFIG_L2I _BN( 0) /* L2 Instruction Caching Enabled */
+#define _BGP_PERS_L2CONFIG_L2D _BN( 1) /* L2 Data Caching Enabled */
+#define _BGP_PERS_L2CONFIG_L2PF _BN( 2) /* L2 Automatic Prefetching Enabled */
+#define _BGP_PERS_L2CONFIG_L2PFO _BN( 3) /* L2 Optimistic Prefetching Enabled */
+#define _BGP_PERS_L2CONFIG_L2PFA _BN( 4) /* L2 Aggressive Prefetching Enabled (fewer deeper streams) */
+#define _BGP_PERS_L2CONFIG_L2PFS _BN( 5) /* L2 Aggressive Many-Stream Prefetching Enabled (deeper only when available buffers) */
+#define _BGP_PERS_L2CONFIG_Snoop _BN( 6) /* Just NULL Snoop Filter */
+#define _BGP_PERS_L2CONFIG_SnoopCache _BN( 7) /* Snoop Caches */
+#define _BGP_PERS_L2CONFIG_SnoopStream _BN( 8) /* Snoop Stream Registers (Disable for BG/P Rit 1.0 due to PPC450 errata) */
+#define _BGP_PERS_L2CONFIG_SnoopRange _BN( 9) /* Snoop Range Filter when possible */
+#define _BGP_PERS_L2CONFIG_BUG824LUMPY _BN(10) /* BPC_BUGS 824: Fix with Lumpy Performance */
+#define _BGP_PERS_L2CONFIG_BUG824SMOOTH _BN(11) /* BPC_BUGS 824: Fix with Smooth Performance, but -12% Memory */
+#define _BGP_PERS_L2CONFIG_NONCOHERENT_STACKS _BN(12) /* Special for Snoop diagnostics. See bgp_vmm.c */
+ /* additional bits may be used for Snoop setting tweaks */
+
+/* Default L2 Configuration: */
+/* L2 Enabled with Multi-Stream Aggressive Prefetching */
+/* Snoop Enabled with all filters except Range */
+#define _BGP_PERS_L2CONFIG_DEFAULT (_BGP_PERS_L2CONFIG_L2I |\
+ _BGP_PERS_L2CONFIG_L2D |\
+ _BGP_PERS_L2CONFIG_L2PF |\
+ _BGP_PERS_L2CONFIG_L2PFO |\
+ _BGP_PERS_L2CONFIG_L2PFS |\
+ _BGP_PERS_L2CONFIG_Snoop |\
+ _BGP_PERS_L2CONFIG_SnoopCache |\
+ _BGP_PERS_L2CONFIG_SnoopStream)
+
+/* Personality.L3Config: Controls and Settings for L3 */
+/* Note: Most bits match _BGP_L3x_CTRL DCRs. */
+/* See arch/include/bpcore/bgl_l3_dcr.h */
+#define _BGP_PERS_L3CONFIG_L3I _BN( 0) /* L3 Enabled for Instructions */
+#define _BGP_PERS_L3CONFIG_L3D _BN( 1) /* L3 Enabled for Data */
+#define _BGP_PERS_L3CONFIG_L3PFI _BN( 2) /* Inhibit L3 Prefetch from DDR */
+#define _BGP_PERS_L3CONFIG_DO_Scratch _BN( 3) /* Set up Scratch? */
+#define _BGP_PERS_L3CONFIG_DO_PFD0 _BN( 4) /* Adjust PFD0? */
+#define _BGP_PERS_L3CONFIG_DO_PFD1 _BN( 5) /* Adjust PFD1? */
+#define _BGP_PERS_L3CONFIG_DO_PFDMA _BN( 6) /* Adjust PFDMA? */
+#define _BGP_PERS_L3CONFIG_DO_PFQD _BN( 7) /* Adjust PFQD? */
+ /* 8..15 unused/available */
+#define _BGP_PERS_L3CONFIG_Scratch(x) _B4(19,x) /* Scratch 8ths: 0..8 */
+#define _BGP_PERS_L3CONFIG_PFD0(x) _B3(22,x) /* Prefetch Depth for DP0 */
+#define _BGP_PERS_L3CONFIG_PFD1(x) _B3(25,x) /* Prefetch Depth for DP1 */
+#define _BGP_PERS_L3CONFIG_PFDMA(x) _B3(28,x) /* Prefetch Depth for DMA */
+#define _BGP_PERS_L3CONFIG_PFQD(x) _B3(31,x) /* Prefetch Queue Depth */
+
+/* General L3 Configuration */
+typedef union T_BGP_Pers_L3Cfg
+ {
+ uint32_t l3cfg;
+ struct {
+ unsigned l3i : 1;
+ unsigned l3d : 1;
+ unsigned l3pfi : 1;
+ unsigned do_scratch : 1;
+ unsigned do_pfd0 : 1;
+ unsigned do_pfd1 : 1;
+ unsigned do_pfdma : 1;
+ unsigned do_pfqd : 1;
+ unsigned rsvd : 8;
+ unsigned scratch : 4;
+ unsigned pfd0 : 3;
+ unsigned pfd1 : 3;
+ unsigned pfdma : 3;
+ unsigned pfqd : 3;
+ };
+ }
+ _BGP_Pers_L3Cfg;
+
+/* Default L3 Configuration: */
+/* L3 Enabled for Instructions and Data */
+/* No Prefetch Depth overrides, No Scratch, No Scrambling. */
+#define _BGP_PERS_L3CONFIG_DEFAULT (_BGP_PERS_L3CONFIG_L3I |\
+ _BGP_PERS_L3CONFIG_L3D |\
+ _BGP_PERS_L3CONFIG_DO_PFDMA |\
+ _BGP_PERS_L3CONFIG_PFDMA(4))
+
+
+/* L3 Cache and Bank Selection, and prefetching tweaks (Recommended for Power-Users) */
+#define _BGP_PERS_L3SELECT_DO_CacheSel _BN( 0) /* Adjust Cache Select setting? */
+#define _BGP_PERS_L3SELECT_DO_BankSel _BN( 1) /* Adjust Bank Select setting? */
+#define _BGP_PERS_L3SELECT_Scramble _BN( 2) /* L3 Scramble */
+#define _BGP_PERS_L3SELECT_PFby2 _BN( 3) /* Prefetch by 2 if set, else by 1 (default) if clear. */
+#define _BGP_PERS_L3SELECT_CacheSel(x) _B5( 8,x) /* PhysAddr Bit for L3 Selection (0..26) */
+#define _BGP_PERS_L3SELECT_BankSel(x) _B5(13,x) /* PhysAddr Bit for L3 Bank Selection (0..26) Must be > CacheSel. */
+
+typedef union T_BGP_Pers_L3Select
+ {
+ uint32_t l3select;
+ struct {
+ unsigned do_CacheSel : 1;
+ unsigned do_BankSel : 1;
+ unsigned l3Scramble : 1;
+ unsigned l3_PF_by2 : 1; /* default is PreFetch by 1. */
+ unsigned CacheSel : 5; /* Physical Address Bit for L3 Selection (0..26) */
+ unsigned BankSel : 5; /* 0..26 Must be strictly greater than CacheSel. */
+ unsigned rsvd : 18;
+ };
+ }
+ _BGP_Pers_L3Select;
+
+/* Default L3 Selection Configuration: Disable overrides, but set h/w default values. */
+#define _BGP_PERS_L3SELECT_DEFAULT (_BGP_PERS_L3SELECT_CacheSel(21) |\
+ _BGP_PERS_L3SELECT_BankSel(26))
+
+/* Tracing Masks and default trace configuration */
+/* See also arch/include/cnk/Trace.h */
+#define _BGP_TRACE_CONFIG _BN( 0) /* Display Encoded personality config on startup */
+#define _BGP_TRACE_ENTRY _BN( 1) /* Function enter and exit */
+#define _BGP_TRACE_INTS _BN( 2) /* Standard Interrupt Dispatch */
+#define _BGP_TRACE_CINTS _BN( 3) /* Critical Interrupt Dispatch */
+#define _BGP_TRACE_MCHK _BN( 4) /* Machine Check Dispatch */
+#define _BGP_TRACE_SYSCALL _BN( 5) /* System Calls */
+#define _BGP_TRACE_VMM _BN( 6) /* Virtual Memory Manager */
+#define _BGP_TRACE_DEBUG _BN( 7) /* Debug Events (app crashes etc) */
+#define _BGP_TRACE_TORUS _BN( 8) /* Torus Init */
+#define _BGP_TRACE_TREE _BN( 9) /* Tree Init */
+#define _BGP_TRACE_GLOBINT _BN(10) /* Global Interrupts */
+#define _BGP_TRACE_DMA _BN(11) /* DMA Setup */
+#define _BGP_TRACE_SERDES _BN(12) /* SerDes Init */
+#define _BGP_TRACE_TESTINT _BN(13) /* Test Interface, ECID, Config */
+#define _BGP_TRACE_ETHTX _BN(14) /* Ethernet Transmit */
+#define _BGP_TRACE_ETHRX _BN(15) /* Ethernet Receive */
+#define _BGP_TRACE_POWER _BN(16) /* Power Control */
+#define _BGP_TRACE_PROCESS _BN(17) /* Process/Thread Mapping */
+#define _BGP_TRACE_EXIT_SUM _BN(18) /* Report Per-Core Interrupt and Error Summary on exit() */
+#define _BGP_TRACE_SCHED _BN(19) /* Report Scheduler Information */
+#define _BGP_TRACE_RAS _BN(20) /* Report RAS Events (in addition to sending to Host) */
+#define _BGP_TRACE_ECID _BN(21) /* Report UCI and ECID on boot */
+#define _BGP_TRACE_FUTEX _BN(22) /* Trace Futex operations */
+#define _BGP_TRACE_MemAlloc _BN(23) /* Trace MMAP and Shared Memory operations */
+#define _BGP_TRACE_CONTROL _BN(24) /* Trace control messages exchanged with I/O node */
+#define _BGP_TRACE_MSGS _BN(25) /* Trace messages and packets sent on virtual channel 0 */
+#define _BGP_TRACE_DEBUGGER _BN(26) /* Trace debugger messages exchanged with I/O node */
+#define _BGP_TRACE_WARNINGS _BN(30) /* Trace Warnings */
+#define _BGP_TRACE_VERBOSE _BN(31) /* Verbose Tracing Modifier */
+
+/* Enable tracking of Regression Suite coverage and report UCI+ECID on boot */
+#define _BGP_PERS_TRACE_DEFAULT 0
+/* (_BGP_TRACE_CONFIG | _BGP_TRACE_ECID) */
+
+
+typedef struct _BGP_Personality_Kernel_t
+ {
+ uint32_t UniversalComponentIdentifier; /* see include/common/bgp_ras.h */
+
+ uint32_t FreqMHz; /* Clock_X1 Frequency in MegaHertz (eg 1000) */
+
+ uint32_t RASPolicy; /* Verbosity level, and other RAS Reporting Controls */
+
+ /* Process Config: */
+ /* Each byte represents a process (1 to 4 processes supported) */
+ /* No core can be assigned to more than 1 process. */
+ /* Cores assigned to no process are disabled. */
+ /* Cores with in a process share the same address space. */
+ /* Separate processes have distinct address spaces. */
+ /* Within each process (0 to 4 cores assigned to a process): */
+ /* Lower nibble is bitmask of which core belongs to that process. */
+ /* Upper nibble is bitmask whether that thread is privileged or user. */
+ /* Processes with zero cores do not exist. */
+ /* E.g., for Diagnostics, we sometimes use 0xFF000000, which means */
+ /* that all 4 cores run privileged in process 0. */
+ uint32_t ProcessConfig;
+
+ uint32_t TraceConfig; /* Kernel Tracing Enables */
+ uint32_t NodeConfig; /* Kernel Driver Enables */
+ uint32_t L1Config; /* L1 Config and setup controls */
+ uint32_t L2Config; /* L2 and Snoop Config and setup controls */
+ uint32_t L3Config; /* L3 Config and setup controls */
+ uint32_t L3Select; /* L3 Cache and Bank Selection controls */
+
+ uint32_t SharedMemMB; /* Memory to Reserve for Sharing among Processes */
+
+ uint32_t ClockStop0; /* Upper 11Bits of ClockStop, enabled if Non-zero */
+ uint32_t ClockStop1; /* Lower 32Bits of ClockStop, enabled if Non-zero */
+ }
+ _BGP_Personality_Kernel_t;
+
+
+/* Defaults for DDR Config */
+#define _BGP_PERS_DDR_PBX0_DEFAULT (0x411D1512) /* PBX DCRs setting (in IBM bit numbering) */
+#define _BGP_PERS_DDR_PBX1_DEFAULT (0x40000000) /* PBX DCRs setting (in IBM bit numbering) */
+#define _BGP_PERS_DDR_MemConfig0_DEFAULT (0x81fc4080) /* MemConfig */
+#define _BGP_PERS_DDR_MemConfig1_DEFAULT (0x0C0ff800) /* MemConfig */
+#define _BGP_PERS_DDR_ParmCtl0_DEFAULT (0x3216c008) /* Parm Control */
+#define _BGP_PERS_DDR_ParmCtl1_DEFAULT (0x4168c323) /* Parm Control */
+#define _BGP_PERS_DDR_MiscCtl0_DEFAULT (0) /* Misc. Control */
+#define _BGP_PERS_DDR_MiscCtl1_DEFAULT (0) /* Misc. Control */
+#define _BGP_PERS_DDR_CmdBufMode0_DEFAULT (0x00400fdf) /* Command Buffer Mode */
+#define _BGP_PERS_DDR_CmdBufMode1_DEFAULT (0xffc80600) /* Command Buffer Mode */
+#define _BGP_PERS_DDR_RefrInterval0_DEFAULT (0xD1000002) /* Refresh Interval */
+#define _BGP_PERS_DDR_RefrInterval1_DEFAULT (0x04000000) /* Refresh Interval */
+#define _BGP_PERS_DDR_ODTCtl0_DEFAULT (0) /* ODT Control */
+#define _BGP_PERS_DDR_ODTCtl1_DEFAULT (0) /* ODT Control */
+#define _BGP_PERS_DDR_TimingTweaks_DEFAULT (0) /* DRAM timing tweaks to use */
+#define _BGP_PERS_DDR_DataStrobeCalib1_DEFAULT (0xa514c805) /* Data Strobe Calibration */
+#define _BGP_PERS_DDR_DQSCtl_DEFAULT (0x00000168) /* DQS Control */
+#define _BGP_PERS_DDR_Throttle_DEFAULT (0) /* DDR Throttle */
+
+#define _BGP_PERS_DDR_CAS_DEFAULT (4) /* CAS Latency (3, 4, or 5) */
+#define _BGP_PERS_DDR_DDRSizeMB_DEFAULT (2048) /* Total DDR size in MegaBytes (512MB - 16384MB). */
+#define _BGP_PERS_DDR_Chips_DEFAULT (0x01) /* Type of DDR chips: 512GBx8 */
+
+#define _BGP_PERS_DDRFLAGS_ENABLE_Scrub _BN(0) /* Enable DDR Slow Scrub when 1 */
+
+/* DDRFLAGS default: Enable Slow Scrub. */
+#define _BGP_PERS_DDRFLAGS_DEFAULT (_BGP_PERS_DDRFLAGS_ENABLE_Scrub)
+
+#define _BGP_PERS_SRBS0_DEFAULT (0xFFFFFFFF)
+#define _BGP_PERS_SRBS1_DEFAULT (0xFFFFFFFF)
+
+typedef struct _BGP_Personality_DDR_t
+ {
+ uint32_t DDRFlags; /* Misc. Flags and Settings */
+ uint32_t SRBS0; /* Controller 0 SRBS/CK Settings */
+ uint32_t SRBS1; /* Controller 1 SRBS/CK Settings */
+ uint32_t PBX0; /* PBX DCRs setting (in IBM bit numbering) */
+ uint32_t PBX1; /* PBX DCRs setting (in IBM bit numbering) */
+ uint32_t MemConfig0; /* MemConfig */
+ uint32_t MemConfig1; /* MemConfig */
+ uint32_t ParmCtl0; /* Parm Control */
+ uint32_t ParmCtl1; /* Parm Control */
+ uint32_t MiscCtl0; /* Misc. Control */
+ uint32_t MiscCtl1; /* Misc. Control */
+ uint32_t CmdBufMode0; /* Command Buffer Mode */
+ uint32_t CmdBufMode1; /* Command Buffer Mode */
+ uint32_t RefrInterval0; /* Refresh Interval */
+ uint32_t RefrInterval1; /* Refresh Interval */
+ uint32_t ODTCtl0; /* ODT Control */
+ uint32_t ODTCtl1; /* ODT Control */
+ uint8_t TimingTweaks; /* DRAM timing tweak type */
+ uint8_t Unused0;
+ uint8_t Unused1;
+ uint8_t Unused2;
+ uint32_t DataStrobeCalib1; /* Data Strobe Calibration */
+ uint32_t DQSCtl; /* DQS Control */
+ uint32_t Throttle; /* DDR Throttle */
+ uint16_t DDRSizeMB; /* Total DDR size in MegaBytes (512MB - 16384MB). */
+ uint8_t Chips; /* Type of DDR chips */
+ uint8_t CAS; /* CAS Latency (3, 4, or 5) */
+ }
+ _BGP_Personality_DDR_t;
+
+
+typedef struct _BGP_Personality_Networks_t
+ {
+ uint32_t BlockID; /* a.k.a. PartitionID */
+
+ uint8_t Xnodes,
+ Ynodes,
+ Znodes,
+ Xcoord,
+ Ycoord,
+ Zcoord;
+
+ /* PSet Support */
+ uint16_t PSetNum;
+ uint32_t PSetSize;
+ uint32_t RankInPSet;
+
+ uint32_t IOnodes;
+ uint32_t Rank; /* Rank in Block (or Partition) */
+ uint32_t IOnodeRank; /* Rank (and therefore P2P Addr) of my I/O Node */
+ uint16_t TreeRoutes[ 16 ];
+ }
+ _BGP_Personality_Networks_t;
+
+
+typedef struct _BGP_IP_Addr_t
+ {
+ /* IPv6 Addresses are 16 bytes, where the */
+ /* lower 4 (indices 12-15) can be used for IPv4 address. */
+ uint8_t octet[ 16 ];
+ }
+ _BGP_IP_Addr_t;
+
+
+typedef struct _BGP_Personality_Ethernet_t
+ {
+ uint16_t MTU; /* Initial emac MTU size */
+ uint8_t EmacID[6]; /* MAC address for emac */
+ _BGP_IP_Addr_t IPAddress; /* IPv6/IPv4 address of this node */
+ _BGP_IP_Addr_t IPNetmask; /* IPv6/IPv4 netmask */
+ _BGP_IP_Addr_t IPBroadcast; /* IPv6/IPv4 broadcast address */
+ _BGP_IP_Addr_t IPGateway; /* IPv6/IPv4 initial gateway (zero if none) */
+ _BGP_IP_Addr_t NFSServer; /* IPv6/IPv4 NFS system software server address */
+ _BGP_IP_Addr_t serviceNode; /* IPv6/IPv4 address of service node */
+
+ /* NFS mount info */
+ char NFSExportDir[_BGP_PERSONALITY_LEN_NFSDIR];
+ char NFSMountDir[ _BGP_PERSONALITY_LEN_NFSDIR];
+
+ /* Security Key for Service Node authentication */
+ uint8_t SecurityKey[ _BGP_PERSONALITY_LEN_SECKEY ];
+ }
+ _BGP_Personality_Ethernet_t;
+
+
+#define BGP_PERS_BLKCFG_IPOverCollective _BN(31)
+#define BGP_PERS_BLKCFG_IPOverTorus _BN(30)
+#define BGP_PERS_BLKCFG_IPOverCollectiveVC _BN(29)
+#define BGP_PERS_BLKCFG_CIOModeSel(x) _B2(28,x)
+#define BGP_PERS_BLKCFG_bgsysFSSel(x) _B3(26,x)
+#define BGP_PERS_BLKCFG_CIOMode_Full 0
+#define BGP_PERS_BLKCFG_CIOMode_MuxOnly 1
+#define BGP_PERS_BLKCFG_CIOMode_None 2
+#define BGP_PERS_BLKCFG_bgsys_NFSv3 0
+#define BGP_PERS_BLKCFG_bgsys_NFSv4 1
+#define BGP_PERS_BLKCFG_DEFAULT (BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full) | \
+ BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3))
+
+
+typedef struct T_BGP_Personality_t
+ {
+ uint16_t CRC;
+ uint8_t Version;
+ uint8_t PersonalitySizeWords;
+
+ _BGP_Personality_Kernel_t Kernel_Config;
+
+ _BGP_Personality_DDR_t DDR_Config;
+
+ _BGP_Personality_Networks_t Network_Config;
+
+ _BGP_Personality_Ethernet_t Ethernet_Config;
+
+ uint8_t Block_Config;
+ uint8_t padd[7]; /* Pad size to multiple of 16 bytes (== width of DEVBUS_DATA tdr) */
+ /* to simplify jtag operations. See issue #140. */
+ }
+ _BGP_Personality_t;
+
+#define Network_Config_treeInfo0 DDR_Config.ODTCtl0
+#define Network_Config_treeInfo1 DDR_Config.ODTCtl1
+#define Network_Config_treeInfo2 DDR_Config.CmdBufMode0
+
+/* _BGP_PersonalityTreeInfo provides information about one of the tree
+ * ports (A,B or C) on this node. It is a 32-bit value.
+ * See accessor methods below which interpret these fields with this layout:
+ *
+ * .-.-.--.--.--.------------------------.
+ * |V|R|LT|CW|DP| destP2Paddr |
+ * `-'-'--'--'--'------------------------'
+ * 1 1 2 2 2 24 <- bits in field
+ *
+ * V Valid bit. Use is deprecated. Was used for forward compatibility
+ * R Wire is redundant
+ * LT Link type (2 bit). 0->no wire, 1->compute node, 2->I/O node, 3->reserved
+ * CW CommWorld wire interpret (2 bit): 0->unused wire, 1->child, 2->parent
+ * DP Destination Port on other end of wire (2 bit) 0,1,2 -> A,B,C
+ * destP2Paddr (24 bit) Tree address of node on other end of the wire
+ */
+
+#define _BGP_PERS_TREEINFO_VALID 0x80000000
+#define _BGP_PERS_TREEINFO_REDUNDANT 0x40000000
+#define _BGP_PERS_TREEINFO_LINKTYPE_MASK 0x30000000
+#define _BGP_PERS_TREEINFO_LINKTYPE_SHIFT 28
+#define _BGP_PERS_TREEINFO_COMMWORLD_MASK 0x0c000000
+#define _BGP_PERS_TREEINFO_COMMWORLD_SHIFT 26
+#define _BGP_PERS_TREEINFO_DESTPORT_MASK 0x03000000
+#define _BGP_PERS_TREEINFO_DESTPORT_SHIFT 24
+#define _BGP_PERS_TREEINFO_DESTP2P_MASK 0x00ffffff
+
+#define _BGP_PERS_TREEINFO_LINKTYPE_NOWIRE 0
+#define _BGP_PERS_TREEINFO_LINKTYPE_COMPUTE 1
+#define _BGP_PERS_TREEINFO_LINKTYPE_IO 2
+
+#define _BGP_PERS_TREEINFO_COMMWORLD_UNUSED 0 /* unused wire */
+#define _BGP_PERS_TREEINFO_COMMWORLD_CHILD 1
+#define _BGP_PERS_TREEINFO_COMMWORLD_PARENT 2
+
+#define _BGP_PERS_TREE_PORT_0 0
+#define _BGP_PERS_TREE_PORT_1 1
+#define _BGP_PERS_TREE_PORT_2 2
+
+/* This struct is the layout on big endian architectures (ppc) */
+typedef struct {
+ unsigned valid:1; /* 1 -> this info is valid */
+ unsigned redundant:1; /* 1 -> redundant wire */
+ unsigned linkType:2; /* 0 -> no wire, 1 -> compute node, 2 -> I/O */
+ unsigned commWorld:2; /* 1 -> child port, 2 -> parent port on comm_world tree */
+ unsigned destPort:2; /* dest port 0,1,2 -> A,B,C */
+ unsigned destP2Paddr:24; /* destination tree addr on this port */
+} _BGP_PersonalityTreeInfo_t;
+
+
+
+/* Define a static initializer for default configuration. (DEFAULTS FOR SIMULATION) */
+/* This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c */
+#define _BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER { \
+ 0, /* CRC */ \
+ _BGP_PERSONALITY_VERSION, /* Version */ \
+ (sizeof(_BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \
+ { /* _BGP_Personality_Kernel_t: */ \
+ 0, /* MachineLocation */ \
+ _BGP_DEFAULT_FREQ, /* FreqMHz */ \
+ _BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \
+ _BGP_PERS_PROCESSCONFIG_DEFAULT, /* ProcessConfig */ \
+ _BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \
+ _BGP_PERS_NODECONFIG_DEFAULT, /* NodeConfig */ \
+ _BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \
+ _BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \
+ _BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \
+ _BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \
+ 0, /* SharedMemMB */ \
+ 0, /* ClockStop0 */ \
+ 0 /* ClockStop1 */ \
+ }, \
+ { /* _BGP_Personality_DDR_t: */ \
+ _BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \
+ _BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \
+ _BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \
+ _BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \
+ _BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \
+ _BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \
+ _BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \
+ _BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \
+ _BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \
+ _BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \
+ _BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \
+ _BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \
+ _BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \
+ _BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \
+ _BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \
+ _BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \
+ _BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \
+ _BGP_PERS_DDR_TimingTweaks_DEFAULT, /* TimingTweaks */ \
+ _BGP_PERS_Unused_DEFAULT, /* Unused0 */ \
+ _BGP_PERS_Unused_DEFAULT, /* Unused1 */ \
+ _BGP_PERS_Unused_DEFAULT, /* Unused2 */ \
+ _BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+ _BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \
+ _BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \
+ _BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \
+ _BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \
+ _BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \
+ }, \
+ { /* _BGP_Personality_Networks_t: */ \
+ 0, /* BlockID */ \
+ 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \
+ 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \
+ 0, /* PSetNum */ \
+ 0, /* PSetSize */ \
+ 0, /* RankInPSet */ \
+ 0, /* IOnodes */ \
+ 0, /* Rank */ \
+ 0, /* IOnodeRank */ \
+ { 0, } /* TreeRoutes[ 16 ] */ \
+ }, \
+ { /* _BGP_Personality_Ethernet_t: */ \
+ 1536, /* mtu */ \
+ { 0, }, /* EmacID[6] */ \
+ { { 0x00,0x00,0x00,0x00, /* IPAddress */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0xFF,0xFF,0xFF,0x70 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPGateway */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* NFSServer */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* serviceNode */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ "", /* NFSExportDir[32] */ \
+ "", /* NFSMountDir[32] */ \
+ { 0x00, } /* SecurityKey[32] */ \
+ }, \
+ 0, /* Block_Config */ \
+ { 0, } /* padd[7] */ \
+ }
+
+
+/* Define a static initializer for default configuration. (DEFAULTS FOR HARDWARE) */
+/* This is used in bootloader:bgp_Personality.c and svc_host:svc_main.c */
+#define _BGP_PERSONALITY_DEFAULT_STATIC_INITIALIZER_FOR_HARDWARE { \
+ 0, /* CRC */ \
+ _BGP_PERSONALITY_VERSION, /* Version */ \
+ (sizeof(_BGP_Personality_t)/sizeof(uint32_t)), /* PersonalitySizeWords */ \
+ { /* _BGP_Personality_Kernel_t: */ \
+ 0, /* MachineLocation */ \
+ _BGP_DEFAULT_FREQ, /* FreqMHz */ \
+ _BGP_PERS_RASPOLICY_DEFAULT, /* RASPolicy */ \
+ _BGP_PERS_PROCESSCONFIG_SMP, /* ProcessConfig */ \
+ _BGP_PERS_TRACE_DEFAULT, /* TraceConfig */ \
+ _BGP_PERS_NODECONFIG_DEFAULT_FOR_HARDWARE, /* NodeConfig */ \
+ _BGP_PERS_L1CONFIG_DEFAULT, /* L1Config */ \
+ _BGP_PERS_L2CONFIG_DEFAULT, /* L2Config */ \
+ _BGP_PERS_L3CONFIG_DEFAULT, /* L3Config */ \
+ _BGP_PERS_L3SELECT_DEFAULT, /* L3Select */ \
+ 0, /* SharedMemMB */ \
+ 0, /* ClockStop0 */ \
+ 0 /* ClockStop1 */ \
+ }, \
+ { /* _BGP_Personality_DDR_t: */ \
+ _BGP_PERS_DDRFLAGS_DEFAULT, /* DDRFlags */ \
+ _BGP_PERS_SRBS0_DEFAULT, /* SRBS0 */ \
+ _BGP_PERS_SRBS1_DEFAULT, /* SRBS1 */ \
+ _BGP_PERS_DDR_PBX0_DEFAULT, /* PBX0 */ \
+ _BGP_PERS_DDR_PBX1_DEFAULT, /* PBX1 */ \
+ _BGP_PERS_DDR_MemConfig0_DEFAULT, /* MemConfig0 */ \
+ _BGP_PERS_DDR_MemConfig1_DEFAULT, /* MemConfig1 */ \
+ _BGP_PERS_DDR_ParmCtl0_DEFAULT, /* ParmCtl0 */ \
+ _BGP_PERS_DDR_ParmCtl1_DEFAULT, /* ParmCtl1 */ \
+ _BGP_PERS_DDR_MiscCtl0_DEFAULT, /* MiscCtl0 */ \
+ _BGP_PERS_DDR_MiscCtl1_DEFAULT, /* MiscCtl1 */ \
+ _BGP_PERS_DDR_CmdBufMode0_DEFAULT, /* CmdBufMode0 */ \
+ _BGP_PERS_DDR_CmdBufMode1_DEFAULT, /* CmdBufMode1 */ \
+ _BGP_PERS_DDR_RefrInterval0_DEFAULT, /* RefrInterval0 */ \
+ _BGP_PERS_DDR_RefrInterval1_DEFAULT, /* RefrInterval1 */ \
+ _BGP_PERS_DDR_ODTCtl0_DEFAULT, /* ODTCtl0 */ \
+ _BGP_PERS_DDR_ODTCtl1_DEFAULT, /* ODTCtl1 */ \
+ _BGP_PERS_DDR_TimingTweaks_DEFAULT, /* TimingTweaks */ \
+ _BGP_PERS_Unused_DEFAULT, /* Unused0 */ \
+ _BGP_PERS_Unused_DEFAULT, /* Unused1 */ \
+ _BGP_PERS_Unused_DEFAULT, /* Unused2 */ \
+ _BGP_PERS_DDR_DataStrobeCalib1_DEFAULT, /* DataStrobeCalib1 */ \
+ _BGP_PERS_DDR_DQSCtl_DEFAULT, /* DQSCtl */ \
+ _BGP_PERS_DDR_Throttle_DEFAULT, /* Throttle */ \
+ _BGP_PERS_DDR_DDRSizeMB_DEFAULT, /* DDRSizeMB */ \
+ _BGP_PERS_DDR_Chips_DEFAULT, /* Chips */ \
+ _BGP_PERS_DDR_CAS_DEFAULT /* CAS */ \
+ }, \
+ { /* _BGP_Personality_Networks_t: */ \
+ 0, /* BlockID */ \
+ 1, 1, 1, /* Xnodes, Ynodes, Znodes */ \
+ 0, 0, 0, /* Xcoord, Ycoord, Zcoord */ \
+ 0, /* PSetNum */ \
+ 0, /* PSetSize */ \
+ 0, /* RankInPSet */ \
+ 0, /* IOnodes */ \
+ 0, /* Rank */ \
+ 0, /* IOnodeRank */ \
+ { 0, } /* TreeRoutes[ 16 ] */ \
+ }, \
+ { /* _BGP_Personality_Ethernet_t: */ \
+ 1536, /* mtu */ \
+ { 0, }, /* EmacID[6] */ \
+ { { 0x00,0x00,0x00,0x00, /* IPAddress */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPNetmask */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0xFF,0xFF,0xFF,0x70 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPBroadcast */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* IPGateway */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* NFSServer */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ { { 0x00,0x00,0x00,0x00, /* serviceNode */ \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00, \
+ 0x00,0x00,0x00,0x00 \
+ } }, \
+ "", /* NFSExportDir[32] */ \
+ "", /* NFSMountDir[32] */ \
+ { 0x00, } /* SecurityKey[32] */ \
+ }, \
+ 0, /* Block_Config */ \
+ { 0, } /* padd[7] */ \
+ }
+
+
+__END_DECLS
+
+
+
+#endif /* Add nothing below this line. */
diff --git a/arch/powerpc/include/common/namespace.h b/arch/powerpc/include/common/namespace.h
new file mode 100644
index 00000000000000..a5ee88e0c836d1
--- /dev/null
+++ b/arch/powerpc/include/common/namespace.h
@@ -0,0 +1,47 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file common/namespace.h
+ */
+
+#ifndef _NAMESPACE_H_ /* Prevent multiple inclusion */
+#define _NAMESPACE_H_
+
+
+
+
+#if !defined(__ASSEMBLY__) && defined(__cplusplus)
+#define __BEGIN_DECLS extern "C" {
+#define __C_LINKAGE extern "C"
+#else
+#define __BEGIN_DECLS
+#define __C_LINKAGE
+#endif
+
+
+#if !defined(__ASSEMBLY__) && defined(__cplusplus)
+#define __END_DECLS }
+#else
+#define __END_DECLS
+#endif
+
+
+
+
+#endif /* Add nothing below this line */
diff --git a/arch/powerpc/include/spi/DMA_Assert.h b/arch/powerpc/include/spi/DMA_Assert.h
new file mode 100644
index 00000000000000..5f21b64b1c8da8
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Assert.h
@@ -0,0 +1,276 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef __DMA_ASSERT_H_ /* Prevent multiple inclusion */
+#define __DMA_ASSERT_H_
+
+#ifndef __LINUX_KERNEL__
+
+/*!
+ * \file spi/DMA_Assert.h
+ *
+ * \brief DMA SPI Assert Macros
+ *
+ * Two sets of assert macros are provided:
+ * - Kernel Asserts
+ * - User-mode Asserts
+ *
+ * When DMA SPIs are used within the kernel, a special assert routine is called
+ * that does NOT abort. It just prints the assertion and the location and
+ * continues.
+ *
+ * When DMA SPIs are used within user-mode code, the normal assert routine is
+ * called, which prints the assertion and location and aborts.
+ *
+ * Several levels of asserts are provided, and #define variables control which
+ * levels are activated. The following assert macros are available:
+ *
+ * SPI_abort - Always active and always issues assert(0).
+ * Primarily used for unimplemented code paths.
+ * Not available in the kernel.
+ * SPI_assert - Active by default, or when ASSERT_PROD is defined.
+ * Meant to flag user errors.
+ * SPI_assert_debug - Active by default. Meant to flag coding
+ * errors before shipping.
+ *
+ * The following #defines control which level of asserts are compiled into
+ * the code. Only one of ASSERT_ABORT, ASSERT_PROD (or nothing) should
+ * be specified.
+ * - ASSERT_ABORT means that the "abort" level is the only level
+ * of asserts that is active. Other levels are turned off.
+ * - ASSERT_PROD means that "abort" and "assert" levels are active.
+ * "assert_debug" is turned off.
+ * - Not specifying ASSERT_ABORT or ASSER_PROD means that all
+ * levels of asserts ("abort", "assert", "assert_debug") are
+ * active.
+ */
+
+#include <common/namespace.h>
+
+
+__BEGIN_DECLS
+
+
+#include <stdio.h>
+
+/* ============================================================ */
+
+#ifdef __CNK__
+
+/*!
+ * \brief Production-level Kernel Assert.
+ *
+ * This production level of assert will be active during normal production
+ * code execution.
+ *
+ * When in the kernel, just do a printf, but don't exit.
+ */
+#define SPI_assert(x) DMA_KernelAssert(x)
+
+/*!
+ * \brief Debug-level Kernel Assert.
+ *
+ * This debug level of assert will only be active during in-house debugging.
+ *
+ * When in the kernel, just do a printf, but don't exit.
+ */
+#define SPI_assert_debug(x) DMA_KernelAssert(x)
+
+#ifdef NDEBUG
+
+/*!
+ * \brief No Debug Kernel Assert Internal Macro
+ *
+ * This macro is used internally for when asserts are turned off via the NDEBUG
+ * flag. It does nothing.
+ */
+#define DMA_KernelAssert( __assert_test ) ((void)0)
+
+/* ============================================================ */
+
+#else /* not NDEBUG */
+
+/*!
+ * \brief Kernel Assert Internal Function
+ *
+ * This function is called when the kernel determines that it needs to assert.
+ * It prints the assertion that failed and the code location, but does not
+ * abort. The kernel should continue executing.
+ *
+ * \param[in] Pointer to the assertion string that failed the test
+ * \param[in] Pointer to the name of the source file that coded the assert
+ * \param[in] Line number within the source file that coded the assert
+ */
+extern inline void __DMA_KernelAssert( const char *__assertion,
+ const char *__file,
+ int __line )
+{
+ printf("Assertion Failed: %s, file %s, line %d.\n",
+ __assertion,
+ __file,
+ __line );
+}
+
+
+/*!
+ * \brief Kernel Assert Internal Macro
+ *
+ * This macro is used internally when asserts are turned on (the NDEBUG flag
+ * is not specified). It tests the assertion. If the assertion is true, it
+ * does nothing. If the assertion is false, it invokes the __DMA_KernelAssert
+ * internal function to print out the assert information.
+ *
+ * \param[in] Pointer to the assertion string that failed the test
+ * \param[in] Pointer to the name of the source file that coded the assert
+ * \param[in] Line number within the source file that coded the assert
+ */
+#define DMA_KernelAssert( __assert_test ) \
+ ((__assert_test) ? ((void)0) : \
+ __DMA_KernelAssert( #__assert_test, __FILE__, __LINE__ ))
+
+
+#endif /* NDEBUG */
+
+/* ============================================================ */
+
+#else /* not __CNK__ */
+
+#include <assert.h>
+
+#ifdef ASSERT_ABORT
+
+/*!
+ * \brief Abort-level Abort Assert
+ *
+ * This macro is defined when the ASSERT_ABORT level of asserts is active.
+ *
+ * This macro will assert(0).
+ *
+ */
+#define SPI_abort() assert(0)
+
+/*!
+ * \brief Abort-level Production Assert
+ *
+ * This macro is defined when the ASSERT_ABORT level of asserts is active.
+ * This macro will not assert. It will simply execute the assert test, but
+ * because abort-level-only asserts are active, it will not assert.
+ *
+ */
+#define SPI_assert(x)
+
+/*!
+ * \brief Abort-level Debug Assert
+ *
+ * This macro is defined when the ASSERT_ABORT level of asserts is active.
+ * This macro will not assert. It will simply execute the assert test, but
+ * because abort-level-only asserts are active, it will not assert.
+ *
+ */
+#define SPI_assert_debug(x)
+
+/* ============================================================ */
+
+#else /* Not ASSERT_ABORT */
+
+#ifdef ASSERT_PROD
+
+/*!
+ * \brief Production-level Abort Assert
+ *
+ * This macro is defined when the ASSERT_PROD level of asserts is active.
+ *
+ * This macro will assert(0).
+ *
+ */
+#define SPI_abort() assert(0)
+
+/*!
+ * \brief Production-level Production Assert
+ *
+ * This macro is defined when the ASSERT_PROD level of asserts is active.
+ *
+ * This macro invokes the standard assert() function with the specified
+ * assert test.
+ */
+#define SPI_assert(x) assert(x)
+
+/*!
+ * \brief Production-level Debug Assert
+ *
+ * This macro is defined when the ASSERT_PROD level of asserts is active.
+ *
+ * This macro will not assert. It will simply execute the assert test, but
+ * because production-level-only asserts are active, it will not assert.
+ */
+#define SPI_assert_debug(x)
+
+/* ============================================================ */
+
+#else /* Not ASSERT_PROD */
+
+/*!
+ * \brief Debug-level Abort Assert
+ *
+ * This macro is defined when all levels of asserts are desired (neither the
+ * ASSERT_ABORT nor ASSERT_PROD level of asserts is active. This is the
+ * default).
+ *
+ * This macro will assert(0).
+ *
+ */
+#define SPI_abort() assert(0)
+
+/*!
+ * \brief Debug-level Production Assert
+ *
+ * This macro is defined when all levels of asserts are desired (neither the
+ * ASSERT_ABORT nor ASSERT_PROD level of asserts is active. This is the
+ * default).
+ *
+ * This macro invokes the standard assert() function with the specified
+ * assert test.
+ */
+#define SPI_assert(x) assert(x)
+
+/*!
+ * \brief Debug-level Debug Assert
+ *
+ * This macro is defined when all levels of asserts are desired (neither the
+ * ASSERT_ABORT nor ASSERT_PROD level of asserts is active. This is the
+ * default).
+ *
+ * This macro invokes the standard assert() function with the specified
+ * assert test.
+ */
+#define SPI_assert_debug(x) assert(x)
+
+#endif
+
+#endif
+
+#endif /* __CNK__ */
+
+
+__END_DECLS
+
+
+#endif /* ! __LINUX_KERNEL__ */
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_Counter.h b/arch/powerpc/include/spi/DMA_Counter.h
new file mode 100644
index 00000000000000..4a46a19ea07829
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Counter.h
@@ -0,0 +1,2986 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef _DMA_COUNTER_H_ /* Prevent multiple inclusion */
+#define _DMA_COUNTER_H_
+
+
+/*!
+ * \file spi/DMA_Counter.h
+ *
+ * \brief DMA SPI Counter Definitions and Inline Functions
+ *
+ * This include file contains inline functions that are used to interface with
+ * BG/P DMA injection and reception counters at the lowest level.
+ * Functions include
+ * - set and get a counter's value and base address
+ * - enable and disable a counter or group of counters
+ * - query whether a counter or group of counters has hit zero
+ * - clear a counter's or group of counters' hit-zero state
+ * - set and get a reception counter's maximum address
+ *
+ * Definitions:
+ * - A counter is a 32-bit value containing the number of bytes being
+ * transferred from/to memory
+ * - Associated with a counter is a base address indicating where the data is
+ * being transferred from/to
+ * - Associated with a reception counter is a max address bounding the DMA
+ * transfer.
+ * - There are injection (iDMA) and reception (rDMA) counters
+ * - There are DMA_NUM_COUNTERS iDMA counters and DMA_NUM_COUNTERS rDMA
+ * counters
+ * - A counter group consists of DMA_NUM_COUNTERS_PER_GROUP counters
+ * - There are DMA_NUM_COUNTER_GROUPS iDMA counter groups and
+ * DMA_NUM_COUNTER_GROUPS rDMA counter groups
+ * - A subgroup consists of DMA_NUM_COUNTERS_PER_SUBGROUP counters. This is
+ * the unit of counter allocation.
+ * - The highest-level counter inlines in this include file work with virtual
+ * addresses. They are converted to physical addresses and placed into the
+ * counter.
+ * - The counter's base and max addresses reside in the DMA memory map (DMA
+ * SRAM). The DMA_CounterHw_t structure, known as the hardware counter
+ * structure maps a single counter in this storage. They are "shadowed" by
+ * these inline functions to a DMA_Counter_t structure in DDR memory,
+ * known as the software counter structure, and their associated virtual
+ * address is also stored in that structure for easy retrieval. The
+ * physical addresses really don't have to reside in this shadow structure,
+ * but it is faster to access them there than from the DMA's SRAM.
+ * - The counter's base and max addresses are stored in the DMA SRAM as
+ * 16B-aligned 4-bit shifted physical addresses. That is, the 36-bit
+ * physical address is right shifted 4 bits, aligning it on a 16B boundary
+ * leaving 32 bits. The following naming conventions are used to store
+ * addresses:
+ * - pa_xxxx: Physical address (32-bit, 16B-aligned 4-bit shifted)
+ * - va_xxxx: Virtual address (32 bits).
+ *
+ * \verbatim Picture of data structures:
+
+ ========DDR MEMORY===================|==========DMA SRAM MEMORY=============
+ ------------------------------ |
+ | DMA_CounterGroup_t | |
+ | | | --------------------------------
+ | status --------------------|-------|---->| DMA_CounterStatus_t |
+ | counter[0..63] | | --------------------------------
+ | ------------------------ | |
+ | | DMA_Counter_t | | | -----------------------------
+ | 0 | (software counter) | | | | DMA_CounterHw_t |
+ | | counter_hw_ptr-------|-|-------|---->| (hardware counter) |
+ | ------------------------ | | -----------------------------
+ | . | |
+ | . | |
+ | . | |
+ | ------------------------ | |
+ | | DMA_Counter_t | | | -----------------------------
+ |63 | (software counter) | | | | DMA_CounterHw_t |
+ | | counter_hw_ptr-------|-|-------|---->| (hardware counter) |
+ | ------------------------ | | -----------------------------
+ | . | |
+ ------------------------------ |
+
+ \endverbatim
+ *
+ * \note Memory consistency/coherency inside these inlines is achieved using
+ * mbar and msync.
+ *
+ * MBAR is used to make sure that all writes to memory issued by the
+ * calling core have been accepted by the memory system before
+ * continuing. This guarantees that writes and reads to/from different
+ * addresses to go in defined order.
+ *
+ * MBAR EXAMPLE 1: When a store is done to DMA SRAM, it may not complete
+ * for a period of time. If a counter value is set, and then an injection
+ * fifo tail pointer is set, DMA may see the tail pointer update and begin
+ * the operation before the counter value has been set. Inserting an mbar
+ * between the setting of the counter and the setting of the tail pointer
+ * guarantees that the counter will be set before the tail pointer is
+ * updated.
+ *
+ * MBAR EXAMPLE 2: A counter hits zero. We process the hit-zero and write
+ * a "clear hit zero" to DMA SRAM, and then go read that counter's hit-zero
+ * status (different address). The hit-zero status will still indicate
+ * that it hit zero, even though we have already processed it, unless an
+ * mbar is inserted between clearing the hit-zero and reading the hit-zero
+ * status.
+ *
+ * MBAR PHILOSOPHY: After DMA SRAM is updated in the DMA inline functions,
+ * they always do at least an mbar (possibly an msync instead...see below).
+ *
+ * MSYNC does what mbar does, plus ensures consistency across cores. That
+ * is, it waits for snoops (invalidations of L1 cache) on the other cores
+ * to complete before continuing. This guarantees that all of the cores
+ * will see a consistent view of memory after the msync.
+ *
+ * MSYNC EXAMPLE: When a reception counter has hit zero, we assume the
+ * DMA'd data is available to be read by any core. However, old copies of
+ * that data may still be in the L1 caches. Inserting an msync after
+ * detecting that a counter has hit zero guarantees that the old data has
+ * been removed from the L1 caches.
+ *
+ * MSYNC PHILOSOPHY: After the inline functions detect that a counter has
+ * hit zero, they always do an msync.
+ *
+ * SPECULATIVE EXECUTION OF MSYNC: There are cases where msync is done
+ * conditionally. The CPU will begin execution of both sides of the
+ * condition before the result of the condition has been determined.
+ * Then, it will cancel the execution of one side once the result of the
+ * condition has been determined. This speculation is unwanted when
+ * the first instruction on one side of the condition is msync because
+ * cancelling an msync is similar to executing the complete msync.
+ * To avoid this speculative execution of msync, we call
+ * _bgp_msync_nonspeculative(). This will trick the CPU so it won't begin
+ * the msync until the result of the condition is known.
+ *
+ * CALLER ADVICE: Users of these functions should not need to do
+ * mbar/msync themselves, unless they are doing something like the
+ * following: Read a counter and operate on the result when the counter
+ * hasn't reached zero. The caller will need to perform an msync after
+ * reading the counter in order to ensure that snoops have completed
+ * on all CPUs before operating on the DMA'd data.
+ *
+ */
+
+
+#include <common/namespace.h>
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ * placed in the translation unit (eg. xxx.c)for use within that unit.
+ * The function is not externalized for use by another unit...we want this
+ * so we don't end up with multiple units exporting the same function,
+ * which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+ * \verbatim
+ #ifndef INLINE
+ # define INLINE extern inline
+ #endif
+ INLINE int max(int a, int b) {
+ return a > b ? a : b;
+ }
+ \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+ * \verbatim
+ #define INLINE
+ #include "header.h"
+ \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+#ifndef __LINUX_KERNEL__
+
+#include <errno.h>
+#include <bpcore/ppc450_inlines.h> /* For bgp_msync_nonspeculative() */
+
+#endif /* ! __LINUX_KERNEL__ */
+
+#include <spi/DMA_Assert.h>
+#include <spi/bpcore_interface.h> /* For _BGP_IC_DMA_NFT_G3_HIER_POS*/
+#include <spi/kernel_interface.h> /* For Kernel_Virtual2Physical() */
+
+/* #include <asm/bluegene.h> */
+static inline unsigned bic_hw_to_irq(unsigned group, unsigned gint)
+{
+ return ((group+1) << 5) | (gint & 0x1f);
+}
+
+
+/*
+ * ------------------------------------------------------------------------------
+ * Definitions
+ * ------------------------------------------------------------------------------
+ */
+
+/*!
+ * \brief Number of DMA counter groups
+ *
+ * There are 4 counter groups.
+ *
+ */
+#define DMA_NUM_COUNTER_GROUPS 4
+
+
+/*!
+ * \brief Number of DMA counters in a counter group
+ *
+ * There are 64 counters in a counter group.
+ *
+ */
+#define DMA_NUM_COUNTERS_PER_GROUP 64
+
+
+/*!
+ * \brief Number of DMA counters in a counter subgroup
+ *
+ * There are 8 counters in a counter subgroup.
+ *
+ */
+#define DMA_NUM_COUNTERS_PER_SUBGROUP 8
+
+
+/*!
+ * \brief Number of DMA counter subgroups in a group
+ *
+ * There are 8 subgroups in a counter group.
+ *
+ */
+#define DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP (DMA_NUM_COUNTERS_PER_GROUP / DMA_NUM_COUNTERS_PER_SUBGROUP)
+
+
+/*!
+ * \brief Number of DMA counter subgroups, in total, across all groups
+ *
+ * There are 32 subgroups in total.
+ *
+ */
+#define DMA_NUM_COUNTER_SUBGROUPS (DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP * DMA_NUM_COUNTER_GROUPS)
+
+
+/*!
+ * \brief Initial value for a DMA counter
+ *
+ * This value is somewhat arbitrary, but is chosen to be different from zero,
+ * because zero means the counter has hit zero, and may cause false interupts.
+ *
+ */
+#define DMA_NUM_COUNTERS ( DMA_NUM_COUNTER_GROUPS * DMA_NUM_COUNTERS_PER_GROUP)
+
+
+/*!
+ * \brief Initial value for a DMA counter
+ *
+ * This value is somewhat arbitrary, but is chosen to be different from zero,
+ * because zero means the counter has hit zero, and may cause false interupts.
+ *
+ */
+#define DMA_COUNTER_INIT_VAL 0xFFFFFFFF
+
+
+/*!
+ * \brief Max Number of Cores Per Node
+ *
+ * This is the maximum number of cores that can run on a compute node.
+ */
+#define DMA_MAX_NUM_CORES 4
+
+
+/*!
+ * \brief Returns the word number that the specified counter is in
+ *
+ * \param[in] counter_id The ID of the counter (0 to
+ * DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return The number of the word that the specified counter is in (0 or 1)
+ *
+ * Used as an index in the "enabled", "enable", "disable", "hit_zero", and
+ * "clear_hit_zero" fields of the DMA_CounterStatus_t structure, and
+ * the permissions field of the DMA_CounterGroup_t structure.
+ *
+ */
+#define DMA_COUNTER_GROUP_WORD_ID(counter_id) ((counter_id)>>5)
+
+
+/*!
+ * \brief Returns the bit within the word that the specified counter is in
+ *
+ * \param[in] counter_id The ID of the counter (0 to
+ * DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return The bit position within the word that the specified counter is
+ * in (0-31)
+ *
+ * Used with the "enabled", "enable", "disable", "hit_zero", and
+ * "clear_hit_zero" fields of the DMA_CounterStatus_t structure, and
+ * the permissions" field of the DMA_CounterGroup_t structure.
+ *
+ */
+#define DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id) ((counter_id) & 0x0000001F)
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Structures
+ * -----------------------------------------------------------------------------
+ */
+
+/*!
+ * \brief Hardware DMA Counter
+ *
+ * This maps a DMA counter as it is in the DMA memory map (DMA SRAM).
+ *
+ */
+typedef struct DMA_CounterHw_t
+{
+ volatile unsigned counter; /*!< RW Value of the counter */
+ volatile unsigned increment; /*!< W Increment the counter by this value */
+ volatile unsigned pa_base; /*!< RW Base address of the counter, 32 bit
+ 16B-aligned 4-bit shifted address */
+ volatile unsigned pa_max; /*!< RW Maximum payload address (rDMA only),
+ 16B-aligned 4-bit shifted address */
+}
+DMA_CounterHw_t;
+
+
+/*!
+ * \brief DMA Counter Hardware Status structure
+ *
+ * This structure maps the DMA SRAM for a particular group of
+ * DMA_NUM_COUNTERS_PER_GROUP counters.
+ *
+ * This is a common structure between iDMA and rDMA.
+ *
+ * \see DMA_COUNTER_GROUP_WORD_ID
+ * \see DMA_COUNTER_GROUP_WORD_BIT_ID
+ *
+ */
+typedef struct DMA_CounterStatus_t
+{
+ volatile unsigned enabled[2]; /*!< R bitmask (1 bit/counter):
+ Counter is enabled (1=enabled) */
+ volatile unsigned enable[2]; /*!< W bitmask (1 bit/counter):
+ Counter enable: writing a 1 to
+ bit i enables counter i. This
+ changes the corrresponding bit
+ in enabled. */
+ volatile unsigned disable[2]; /*!< W bitmask (1 bit/counter):
+ Counter disble: writing a 1 to
+ bit i disbles counter i. This
+ changes the corrresponding bit
+ in enabled. */
+ volatile unsigned reserved[2]; /*!< HOLE */
+ volatile unsigned hit_zero[2]; /*!< R bitmask (1 bit/counter):
+ Counter hit zero
+ (1=counter hit zero) */
+ volatile unsigned clear_hit_zero[2]; /*!< W bitmask (1 bit/counter):
+ Clear counter hit zero: writing
+ a 1 to bit i clears the
+ corresponding bit in hit_zero. */
+ volatile unsigned grp_status; /*!< R bitmask (1 bit/subgroup):
+ bit i is 1 if or-reduce over
+ sub-group i of the hit_zero bits
+ anded with the enable bits.
+ Note this includes info about
+ all DMA_NUM_COUNTERS counters,
+ not just those in this group. */
+}
+DMA_CounterStatus_t;
+
+
+/*!
+ * \brief Software DMA Counter Structure
+ *
+ * This structure provides a shadow (recent copy) of the hardware counter's
+ * base and max. While accessing the actual hardware DMA counter's base and
+ * max is equivalent, it is slower than accessing them from here.
+ *
+ * Additionally, it stores the corresponding virtual addresses, for easy
+ * retrieval, since the hardware counter does not maintain the virtual
+ * address.
+ *
+ * Finally, it contains a pointer to the corresponding hardware counter in
+ * DMA SRAM.
+ *
+ */
+typedef struct DMA_Counter_t
+{
+ void *va_base; /*!< Shadow virtual address of the base */
+ unsigned int pa_base; /*!< Shadow physical address of the base.
+ 16B-aligned 4-bit shifted address. */
+ void *va_max; /*!< Shadow virtual address of the max (rDMA only) */
+ unsigned int pa_max; /*!< Shadow physical address of the max (rDMA only)
+ 16B-aligned 4-bit shifted address. */
+ DMA_CounterHw_t *counter_hw_ptr; /*!< Pointer to the hardware counter */
+}
+ALIGN_L1D_CACHE DMA_Counter_t;
+/*!
+ * \todo Re-think whether we need to align this structure on a L1 cache line boundary
+ *
+ */
+
+
+/*!
+ * \enum DMA_Type_t
+ * \brief DMA type (injection/reception) enum
+ *
+ */
+typedef enum DMA_Type_t
+{
+ DMA_Type_Injection = 0, /*!< Injection type of DMA */
+ DMA_Type_Reception = 1 /*!< Reception type of DMA */
+
+}
+DMA_Type_t;
+
+
+/*!
+ * \brief DMA Counter Group Structure
+ *
+ * This structure defines a DMA Counter Group. It is filled in by the kernel
+ * during the DMA_CounterGroupAllocate system call. It points to a
+ * DMA Counter Status structure, and contains up to DMA_NUM_COUNTERS_PER_GROUP
+ * software DMA Counter structures making up this group.
+ *
+ * It also contains permission bits to use the counters, one bit per counter.
+ * When the permission bit is on, the corresponding counter belongs to this
+ * group and can be used. Otherwise, the counter should not be used as part
+ * of this group. These permission bits are used as follows:
+ * 1. Inline functions will ASSERT when an attempt is made
+ * to use a counter that is not part of this group.
+ * 2. Inline functions will use the permission bits as a mask
+ * to return status information only for the counters allocated
+ * to this group.
+ * Use the DMA_COUNTER_GROUP_WORD_ID and DMA_COUNTER_GROUP_WORD_BIT_ID
+ * macros to locate the appropriate "permitted_counters" bit.
+ *
+ * Allocations are done in subgroups (groups of DMA_NUM_COUNTERS_PER_SUBGROUP
+ * counters). This structure contains a bit mask of the subgroups that belong
+ * to this group.
+ *
+ * \see DMA_COUNTER_GROUP_WORD_ID
+ * \see DMA_COUNTER_GROUP_WORD_BIT_ID
+ *
+ */
+typedef struct DMA_CounterGroup_t
+{
+
+ DMA_CounterStatus_t *status_ptr; /*!< Pointer to counter status */
+ unsigned int permissions[2]; /*!< Bit i is 1 if permitted to use
+ counter i, 0 otherwise. One bit
+ per counter,
+ DMA_NUM_COUNTERS_PER_GROUP
+ counters. */
+ unsigned int grp_permissions; /*!< Bit i is 1 if permitted to use
+ subgroup i, 0 otherwise. One
+ bit per subgroup, 8 subgroups. */
+ unsigned int group_id; /*!< The id of this group (0 to
+ DMA_NUM_COUNTER_GROUPS-1). */
+ DMA_Type_t type; /*!< The type of the DMA (injection
+ or reception) */
+ DMA_Counter_t counter[DMA_NUM_COUNTERS_PER_GROUP]; /*!<
+ Software Counter Structures.
+ i-th structure's hardware
+ pointer is non-NULL if
+ permissions[i]=1, NULL if
+ permissions[i]=0. */
+}
+DMA_CounterGroup_t;
+
+
+/*!
+ *
+ * \brief Counter Application Segment
+ *
+ * A segment of user-addressible memory.
+ * Each segment consists of a virtual address, physical address, and length
+ * defining a contiguous segment of storage that is accessible from the
+ * application.
+ */
+typedef struct DMA_CounterAppSegment_t
+{
+ unsigned int length; /*!< Length in bytes of the segment */
+ uint32_t va_base; /*!< Virtual address of the segment base */
+ uint32_t pa_base; /*!< Shifted physical address of the segment base */
+ uint32_t va_max; /*!< Virtual address of the last byte of segment */
+} DMA_CounterAppSegment_t;
+
+
+/*!
+ *
+ * \brief Counter Application Segments
+ *
+ * An array of application segments. There are N application segments per core
+ * on a node. Thus there are N * (number of cores on a node) application
+ * segments in this array. The first group of segments in the array correspond
+ * to core 0. The second group, core 1, etc.
+ */
+extern DMA_CounterAppSegment_t *DMA_CounterAppSegmentArray;
+
+
+/*!
+ * \brief Number of application segments for a core
+ *
+ * The number of application segments is the same for all cores.
+ */
+extern uint32_t DMA_CounterNumAppSegments;
+
+
+/*!
+ * \brief The index of the last application segment accessed for a core.
+ */
+extern int DMA_CounterCachedAppSegmentIndex[DMA_MAX_NUM_CORES];
+
+
+/*!
+ * \brief The Minimum 4-bit Shifted Physical Address Accessible From User Mode
+ */
+extern uint32_t DMA_CounterMinPaAccessibleFromUserMode[DMA_MAX_NUM_CORES];
+
+/*!
+ *
+ * \brief Initialize Counter Application Segments
+ *
+ * Initialize the array of application segments and the global pointer to it.
+ * This identifies the memory regions that the application can access.
+ *
+ * Also, initialize the minimum physical address accessible from user mode
+ * for each core.
+ *
+ * \retval 0 Success
+ * \retval errorNumber Failure
+ */
+int DMA_CounterInitAppSegments(void);
+
+
+/*!
+ *
+ * \brief Get Number of Counter Application Segments
+ *
+ * \returns Number of application segments for a core.
+ */
+__INLINE__ uint32_t DMA_CounterGetNumAppSegments( void )
+{
+ return ( DMA_CounterNumAppSegments );
+}
+
+
+/*!
+ *
+ * \brief Get Pointer to Counter Application Segments
+ *
+ * \param[in] Core number whose application segments pointer is to be
+ * returned.
+ *
+ * \returns Pointer to application segments
+ */
+__INLINE__ DMA_CounterAppSegment_t * DMA_CounterGetAppSegments( unsigned int coreNum )
+{
+ SPI_assert ( coreNum >= 0 );
+ SPI_assert ( coreNum <= DMA_MAX_NUM_CORES );
+
+ {
+ unsigned int index = coreNum * DMA_CounterGetNumAppSegments();
+ return ( & ( DMA_CounterAppSegmentArray [ index ] ) );
+ }
+}
+
+
+/*!
+ *
+ * \brief Get Virtual Addresses for the Min and Max Physical Addresses
+ * for User Space
+ *
+ * Based on information in the DMA_CounterAppSegments array, return the
+ * virtual addresses associated with the min and max physical addresses
+ * allowed for user space.
+ *
+ * \param[out] va_min Pointer to a pointer. Upon return, the pointer is
+ * set to the virtual address associated with the
+ * minimum physical address allowed for user space.
+ * \param[out] va_max Pointer to a pointer. Upon return, the pointer is
+ * set to the virtual address associated with the
+ * maximum physical address allowed for user space.
+ *
+ * If the DMA_CounterNumAppSegments array has not been initialized yet
+ * (it is initialized in DMA_CounterGroupAllocate()), a value of 0 for the
+ * min and 0xFFFFFFFF max is returned.
+ */
+__INLINE__ void DMA_CounterGetMinMaxVa(void ** va_min,
+ void ** va_max)
+{
+ /* Determine the core we are running on so the correct application
+ * segments are consulted
+ */
+ unsigned int coreNum = Kernel_PhysicalProcessorID();
+ DMA_CounterAppSegment_t * appSegmentArray = DMA_CounterGetAppSegments(coreNum);
+ uint32_t numAppSegments = DMA_CounterGetNumAppSegments();
+
+ if ( appSegmentArray )
+ {
+ uint32_t minPaBase=0xFFFFFFFF, maxPa=0;
+ uint32_t segmentPaBase, segmentPaMax;
+ uint32_t i, minIndex=0, maxIndex=0;
+
+ for (i=0; i<numAppSegments; i++)
+ {
+ segmentPaBase = appSegmentArray[i].pa_base;
+ if ( segmentPaBase < minPaBase )
+ {
+ minPaBase = segmentPaBase;
+ minIndex = i;
+ }
+
+ segmentPaMax = appSegmentArray[i].pa_base + (appSegmentArray[i].length >> 4);
+ if ( segmentPaMax > maxPa )
+ {
+ maxPa = segmentPaMax;
+ maxIndex = i;
+ }
+ }
+
+ *va_min = (void*)(appSegmentArray[minIndex].va_base);
+ *va_max = (void*)(appSegmentArray[maxIndex].va_max);
+
+/* printf("coreNum=%d, va_min = 0x%08x, minIndex=%d, va_max = 0x%08x, maxIndex=%d, minPa=0x%08x maxPa=0x%08x\n",coreNum,(unsigned)*va_min, minIndex, (unsigned)*va_max, maxIndex, minPaBase, maxPa); */
+/* fflush(stdout); */
+ }
+ else
+ {
+ *va_min = (void*)0;
+ *va_max = (void*)0xFFFFFFFF;
+ }
+}
+
+
+/*!
+ * \brief Convert a 32-bit virtual address to a 32-bit physical address
+ *
+ * This function is a wrapper around _bgp_Virtual2Physical(), only it combines
+ * its 36-bit output into a 32-bit physical address by right-shifting it 4 bits.
+ * Thus, the physical address returned corresponds to the input virtual address
+ * rounded down to the next lowest 16-byte boundary.
+ *
+ * \param[in] VA 32-bit virtual address to be converted
+ * \param[in] vsize Size in bytes of virtual range
+ * \param[out] pPA Pointer to 32-bit physical address. The output physical
+ * address is returned in the storage pointed to by pPA.
+ *
+ * \retval 0 Successful. The output physical address is in *pPA
+ * \retval -1 Invalid Virtual Address for this process. *pPA unmodified.
+ * \retval -2 The range from VA to (VA+vsize-1) is not physically
+ * contiguous
+ * \retval -3 Virtual Address is in Scratch, but no Scratch, or not enough
+ * Scratch, is enabled. *pPA unmodified.
+ *
+ */
+__INLINE__ int Kernel_VaTo4bitShiftedPa(void *VA,
+ size_t vsize,
+ uint32_t *pPA )
+{
+ int rc;
+ uint32_t ua_out, pa_out;
+
+ SPI_assert( pPA != NULL );
+
+ rc = Kernel_Virtual2Physical(VA,
+ vsize,
+ &ua_out,
+ &pa_out );
+
+ if ( rc == 0 )
+ {
+ *pPA = (ua_out << 28) | (pa_out >> 4);
+ }
+
+ return (rc);
+}
+
+
+/*
+ *------------------------------------------------------------------------------
+ *
+ * The following are inline function wrappers around system calls that
+ * operate on DMA counters.
+ *
+ *------------------------------------------------------------------------------
+ */
+
+
+/*!
+ * \brief Query Free DMA Counter Subgroups within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available) subgroups within the specified group.
+ *
+ * \param[in] type Specifies whether this is an injection or
+ * reception counter group (DMA_Type_Injection
+ * or DMA_Type_Reception)
+ * \param[in] grp Group number being queried (0 to
+ * DMA_NUM_COUNTER_GROUPS-1)
+ * \param[out] num_subgroups Pointer to an int where the number of free
+ * subgroups in the specified group is returned
+ * \param[out] subgroups Pointer to an array of num_subgroups ints where
+ * the list of num_subgroups subgroups is returned.
+ * Each int is the subgroup number
+ * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1). The
+ * caller must provide space for
+ * DMA_NUM_COUNTERS_PER_SUBGROUP ints, in case the
+ * entire counter group is free.
+ *
+ * \retval 0 Successful. num_subgroups and subgroups array set as described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \note The kernel may need to synchronize with other cores performing
+ * allocate or free syscalls.
+ *
+ */
+__INLINE__ int DMA_CounterGroupQueryFree(
+ DMA_Type_t type,
+ int grp,
+ int *num_subgroups,
+ int *subgroups
+ )
+{
+ return Kernel_CounterGroupQueryFree( (uint32_t)type,
+ grp,
+ (uint32_t*)num_subgroups,
+ (uint32_t*)subgroups);
+}
+
+
+/*!
+ * \brief Allocate DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that allocates DMA counters
+ * from the specified group. Counters may be allocated in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters. Parameters specify how interrupts,
+ * generated when a counter hits zero, are to be handled. A
+ * DMA_CounterGroup_t structure is returned for use in other inline
+ * functions to operate on the allocated counters.
+ *
+ * \param[in] type Specifies whether this is an injection or
+ * reception counter group (DMA_Type_Injection
+ * or DMA_Type_Reception)
+ * \param[in] grp Group number whose counters are being allocated
+ * (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in] num_subgroups Number of subgroups to be allocated from the group
+ * (1 to DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in] subgroups Pointer to an array of num_subgroups ints where
+ * the list of subgroups to be allocated is provided.
+ * Each int is the subgroup number
+ * (0 to num_subgroups-1).
+ * \param[in] target The core that will receive the interrupt when a
+ * counter in this allocation hits zero
+ * (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in] handler A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * counter in this allocation hits zero. This
+ * function must be coded to take 4 uint32_t
+ * parameters:
+ * - A pointer to storage specific to this
+ * handler. This is the handler_parm
+ * specified on this allocation function.
+ * - Three unint32_t parameters that are not used.
+ * If handler is NULL, hit-zero interrupts will not
+ * be enabled for these counters.
+ * \param[in] handler_parm A pointer to storage that should be passed to the
+ * interrupt handling function (see handler
+ * parameter)
+ * \param[in] interruptGroup A InterruptGroup_t that identifies the
+ * group of interrupts that the counters being
+ * allocated will become part of.
+ * \param[out] cg_ptr Pointer to a structure that is filled in upon
+ * successful return for use in other inline
+ * functions to operate on the allocated counters.
+ * \li counter - Array of software counter
+ * structures. Each element
+ * points to the corresponding
+ * hardware counter in DMA SRAM.
+ * Pointers are null if not
+ * allocated).
+ * Counters are initialized to
+ * DMA_COUNTER_INIT_VAL,
+ * disabled, their hit_zero bit
+ * is off, base and max are NULL.
+ * \li status_ptr - Points to status area within the
+ * DMA memory map.
+ * \li permissions - Bits set for each allocated
+ * counter
+ * \li grp_permissions - Permissions for each
+ * subgroup
+ * \li group_id - The group number
+ * \li type - The type of DMA (injection or
+ * reception)
+ *
+ * \retval 0 Successful. Counters allocated and cg_ptr structure filled in as
+ * described.
+ * \retval -1 Unsuccessful. errno gives the reason. Nothing has been
+ * allocated.
+ *
+ * \note The kernel may need to synchronize with other cores performing queries
+ * or frees.
+ *
+ */
+__INLINE__ int DMA_CounterGroupAllocate(
+ DMA_Type_t type,
+ int grp,
+ int num_subgroups,
+ int *subgroups,
+ int target,
+ Kernel_CommThreadHandler handler,
+ void *handler_parm,
+ Kernel_InterruptGroup_t interruptGroup,
+ DMA_CounterGroup_t *cg_ptr
+ )
+{
+ int rc;
+ /*
+ * Initialize the Counter Application Segment array and its global pointer if
+ * it has not been initialized yet.
+ */
+ if ( DMA_CounterAppSegmentArray == NULL )
+ {
+ rc = DMA_CounterInitAppSegments();
+ if (rc) return(rc);
+ }
+
+ /*
+ * If an interrupt handler has been specified, invoke the system call
+ * to configure the kernel to invoke the handler when the hit zero
+ * interrupt fires.
+ */
+
+ if (handler)
+ {
+ /*
+ * Calculate the IRQ to be one of
+ * - 0: inj counter hit zero vector 0
+ * - 1: inj counter hit zero vector 1
+ * - 2: inj counter hit zero vector 2
+ * - 3: inj counter hit zero vector 3
+ *
+ * - 4: rec counter hit zero vector 0
+ * - 5: rec counter hit zero vector 1
+ * - 6: rec counter hit zero vector 2
+ * - 7: rec counter hit zero vector 3
+ * based on the counter type and the DMA group number.
+ */
+ unsigned irqInGroup = (type == DMA_Type_Injection) ? 0 + grp : 4 + grp;
+
+ /*
+ * Calculate an interrupt ID, which is the BIC interrupt group (3)
+ * combined with the IRQ number.
+ */
+/* int interruptID = Kernel_MkInterruptID(_BGP_IC_DMA_NFT_G3_HIER_POS, */
+/* irqInGroup); */
+ int interruptID = bic_hw_to_irq(_BGP_IC_DMA_NFT_G3_HIER_POS,
+ irqInGroup);
+
+ /*
+ * Calculate the opcode indicating
+ * - the target core for interrupt
+ * - to call the specified function when the interrupt fires
+ * - to disable interrupts before calling the specified function
+ * - to enable interrupts after callling the specified function
+ */
+ int opcode = ( COMMTHRD_OPCODE_CORE0 + target ) |
+ COMMTHRD_OPCODE_CALLFUNC |
+ COMMTHRD_OPCODE_DISABLEINTONENTRY |
+ COMMTHRD_OPCODE_ENABLEINTONPOOF ;
+
+ /*
+ * Configure this interrupt with the kernel.
+ */
+ rc = Kernel_SetCommThreadConfig(interruptID,
+ opcode,
+ (uint32_t*)interruptGroup,
+ handler,
+ (uint32_t)handler_parm,
+ (uint32_t)NULL,
+ (uint32_t)NULL,
+ (uint32_t)NULL);
+ if (rc) return rc;
+ }
+
+ /*
+ * Invoke the system call to allocate the counters.
+ * This system call also sets up the DMA DCRs to interrupt when the
+ * counters hit zero.
+ */
+ rc = Kernel_CounterGroupAllocate( (uint32_t)type,
+ grp,
+ num_subgroups,
+ (uint32_t*)subgroups,
+ target,
+ (uint32_t) NULL, /* Handler. Not used */
+ (uint32_t*)NULL, /* Handler_parm. Not used */
+ (uint32_t) NULL, /* InterruptGroup. Not used */
+ (uint32_t*)cg_ptr);
+ return rc;
+}
+
+
+/*!
+ * \brief Free DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA counters
+ * from the specified group. Counters may be freed in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters.
+ *
+ * \param[in] grp Group number whose counters are being freed
+ * (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in] num_subgroups Number of subgroups to be freed from the group
+ * (1-DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in] subgroups Pointer to an array of num_subgroups ints where
+ * the list of subgroups to be freed is provided.
+ * Each int is the subgroup number
+ * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1).
+ * \param[out] cg_ptr Pointer to the structure previously filled in when
+ * these counters were allocated. Upon successful
+ * return, this structure is updated to reflect the
+ * freed counters:
+ * \li counter[] - Counter structures Pointers to
+ * freed counters nulled.
+ * \li permissions - Bits cleared for each freed
+ * counter.
+ *
+ * \retval 0 Successful. Counters freed and cg_ptr structure updated as
+ * described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \note The kernel may need to synchronize with other cores performing allocates
+ * or queries.
+ */
+__INLINE__ int DMA_CounterGroupFree(
+ int grp,
+ int num_subgroups,
+ int *subgroups,
+ DMA_CounterGroup_t *cg_ptr
+ )
+{
+ return Kernel_CounterGroupFree( grp,
+ num_subgroups,
+ (uint32_t*)subgroups,
+ (uint32_t*)cg_ptr);
+}
+
+
+
+/*!
+ * \brief Enable or Disable Counter Overflow and Underflow Interrupts
+ *
+ * This function is a wrapper around a system call that enables or disables
+ * the 4 counter overflow/underflow interrupts for all counters:
+ * 1. Injection counter overflow
+ * 2. Injection counter underflow
+ * 3. Reception counter overflow
+ * 4. Reception counter underflow
+ *
+ * \param[in] enable Specifies whether to enable or disable the interrupts
+ * 0 = Disable, 1 = Enable.
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+__INLINE__ int DMA_CounterInterruptControl(unsigned int enable)
+{
+ return Kernel_ChgCounterInterruptEnables( (uint32_t)enable );
+
+}
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * The following inline functions operate directly on the Hardware DMA Counter.
+ * Note that MSYNC and MBAR are not performed by these hardware functions...
+ * it is up to the caller to perform them.
+ *------------------------------------------------------------------------------
+ */
+
+
+/*!
+ * \brief Set DMA Hardware Counter Value
+ *
+ * Set a DMA hardware counter's value, given a pointer to the hardware counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ * \param[in] value The value to be set into the counter
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetValueHw(
+ DMA_CounterHw_t *c_hw,
+ unsigned int value
+ )
+{
+ SPI_assert( c_hw != NULL );
+
+ c_hw->counter = value;
+}
+
+
+/*!
+ * \brief Set DMA Hardware Counter Base
+ *
+ * Set a DMA hardware counter's base, given a pointer to the hardware counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ * \param[in] pa_base The base physical address to be associated with the
+ * counter. 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetBaseHw(
+ DMA_CounterHw_t *c_hw,
+ unsigned int pa_base
+ )
+{
+ SPI_assert( c_hw != NULL );
+
+ c_hw->pa_base = pa_base;
+}
+
+
+/*!
+ * \brief Increment DMA Hardware Counter Value
+ *
+ * Increment a DMA hardware counter's value, given a pointer to the hardware
+ * counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ * \param[in] incr The amount to increment the counter by
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterIncrementHw(
+ DMA_CounterHw_t *c_hw,
+ unsigned int incr
+ )
+{
+ SPI_assert( c_hw != NULL );
+
+ c_hw->increment = incr;
+}
+
+
+/*!
+ * \brief Decrement DMA Hardware Counter Value
+ *
+ * Decrement a DMA hardware counter's value, given a pointer to the hardware
+ * counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ * \param[in] decr The amount to decrement the counter by
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ * \note The counter overflow interrupt will fire as a result of this operation.
+ * Consider disabling this interrupt.
+ *
+ */
+__INLINE__ void DMA_CounterDecrementHw(
+ DMA_CounterHw_t *c_hw,
+ unsigned int decr
+ )
+{
+ SPI_assert( c_hw != NULL );
+
+ /* Decrement the counter by incrementing with a large value, which will
+ * cause the counter to wrap.
+ */
+ c_hw->increment = (0 - decr);
+}
+
+
+/*!
+ * \brief Set Reception DMA Hardware Counter Max
+ *
+ * Set a reception DMA hardware counter's maximum payload address, given a
+ * pointer to the hardware counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ * \param[in] pa_max The max physical address to be associated with the
+ * counter. 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \pre The caller has ASSERTed that (c_hw != NULL)
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetMaxHw(
+ DMA_CounterHw_t *c_hw,
+ unsigned int pa_max
+ )
+{
+ c_hw->pa_max = pa_max;
+}
+
+
+/*!
+ * \brief Set DMA Hardware Counter Value and Base
+ *
+ * Set a DMA hardware counter's value and base, given a pointer to the hardware
+ * counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ * \param[in] value The value to be set into the counter
+ * \param[in] pa_base The base physical address to be associated with the
+ * counter. 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetValueBaseHw(
+ DMA_CounterHw_t *c_hw,
+ unsigned int value,
+ unsigned int pa_base
+ )
+{
+ SPI_assert( c_hw != NULL );
+
+ c_hw->counter = value;
+ c_hw->pa_base = pa_base;
+
+}
+
+
+/*!
+ * \brief Set Reception DMA Hardware Counter Value, Base, and Max
+ *
+ * Set a reception DMA hardware counter's value, base, and max, given a pointer
+ * to the hardware counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ * \param[in] value The value to be set into the counter
+ * \param[in] pa_base The base physical address to be associated with the
+ * counter. 16B-aligned 4-bit shifted physical address.
+ * \param[in] pa_max The max physical address to be associated with the
+ * counter. 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ */
+__INLINE__ void DMA_CounterSetValueBaseMaxHw(
+ DMA_CounterHw_t *c_hw,
+ unsigned int value,
+ unsigned int pa_base,
+ unsigned int pa_max
+ )
+{
+ SPI_assert( c_hw != NULL );
+ SPI_assert( pa_max >= pa_base);
+
+ c_hw->counter = value;
+ c_hw->pa_base = pa_base;
+ c_hw->pa_max = pa_max;
+}
+
+
+/*!
+ * \brief Get DMA Hardware Counter Value
+ *
+ * Get a DMA hardware counter's value, given a pointer to the hardware counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ *
+ * \retval value The current value of the counter
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetValueHw(
+ const DMA_CounterHw_t *c_hw
+ )
+{
+ SPI_assert( c_hw != NULL );
+
+ return( c_hw->counter );
+}
+
+
+/*!
+ * \brief Get DMA Hardware Counter Base
+ *
+ * Get a DMA hardware counter's base, given a pointer to the hardware counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ *
+ * \retval pa_base The base physical address associated with the counter.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetBaseHw(
+ const DMA_CounterHw_t *c_hw
+ )
+{
+ SPI_assert( c_hw != NULL );
+
+ return( c_hw->pa_base );
+}
+
+
+/*!
+ * \brief Get Reception DMA Hardware Counter Max
+ *
+ * Get a reception DMA hardware counter's max payload address, given a pointer
+ * to the hardware counter.
+ *
+ * \param[in] c_hw Pointer to the hardware counter structure
+ *
+ * \retval pa_max The max physical address associated with the counter.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \note No MSYNC or MBAR is done in this function. It is the responsibility
+ * of the caller to do it.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetMaxHw(
+ const DMA_CounterHw_t *c_hw
+ )
+{
+ SPI_assert( c_hw != NULL );
+
+ return( c_hw->pa_max );
+}
+
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * The following inline functions operate indirectly on a hardware DMA counter
+ * through the Software DMA Counter structure.
+ *------------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Set DMA Counter Value
+ *
+ * Set a DMA counter's value, given a pointer to the software DMA counter
+ * structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ * \param[in] value The value to be set into the counter
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_CounterSetValue(
+ DMA_Counter_t *c_sw,
+ unsigned int value
+ )
+{
+ SPI_assert( c_sw != NULL );
+
+ DMA_CounterSetValueHw(c_sw->counter_hw_ptr,
+ value);
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+
+}
+
+
+/*!
+ * \brief Set DMA Counter Base Address
+ *
+ * Set a DMA counter's base address, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ * \param[in] va_base_in The base virtual address to be associated with the
+ * counter.
+ *
+ * \retval 0 Success
+ * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to
+ * the va_base_in being a bad virtual address.
+ *
+ * \post In the software counter structure, va_base and pa_base are set.
+ * In the hardware counter structure, pa_base is set.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ * \note The va_base in the software counter structure is the va_base_in rounded
+ * down to the next lowest 16B-aligned address. The pa_base is the 4-bit
+ * shifted version of va_base.
+ *
+ */
+__INLINE__ int DMA_CounterSetBase(
+ DMA_Counter_t *c_sw,
+ void *va_base_in
+ )
+{
+ int rc;
+
+ SPI_assert( c_sw != NULL );
+
+ /*
+ * 16-B align the virtual address and store result in software counter
+ * structure
+ */
+ c_sw->va_base = (char*)( (unsigned)va_base_in & 0xFFFFFFF0 );
+
+ rc = Kernel_VaTo4bitShiftedPa(c_sw->va_base,
+ 1,
+ &(c_sw->pa_base) );
+ if ( rc != 0 )
+ {
+ errno = EFAULT;
+ return (-1);
+ }
+
+ /*
+ * Write physical address to the hardware counter
+ */
+ DMA_CounterSetBaseHw(c_sw->counter_hw_ptr,
+ c_sw->pa_base);
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+
+ return (0);
+
+}
+
+
+/*!
+ * \brief Increment DMA Counter
+ *
+ * Increment a DMA counter's value, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ * \param[in] incr The amount to increment the counter by
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterIncrement(
+ DMA_Counter_t *c_sw,
+ unsigned int incr
+ )
+{
+ SPI_assert( c_sw != NULL );
+
+ DMA_CounterIncrementHw(c_sw->counter_hw_ptr,
+ incr);
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+}
+
+
+/*!
+ * \brief Decrement DMA Counter
+ *
+ * Decrement a DMA counter's value, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ * \param[in] decr The amount to decrement the counter by
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterDecrement(
+ DMA_Counter_t *c_sw,
+ unsigned int decr
+ )
+{
+ SPI_assert( c_sw != NULL );
+
+ DMA_CounterDecrementHw(c_sw->counter_hw_ptr,
+ decr);
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+}
+
+
+/*!
+ * \brief Set DMA Counter Max Address
+ *
+ * Set a DMA counter's max address, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ * \param[in] va_max_in The max virtual address to be associated with the
+ * counter.
+ *
+ * \retval 0 Success
+ * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to
+ * the va_max_in being a bad virtual address.
+ *
+ * \post In the software counter structure, va_max and pa_max are set.
+ * In the hardware counter structure, pa_max is set.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ * \note The va_max in the software counter structure is the va_max_in rounded
+ * up to the next highest 16B-aligned address. The pa_max is the 4-bit
+ * shifted version of va_max.
+ *
+ */
+__INLINE__ int DMA_CounterSetMax(
+ DMA_Counter_t *c_sw,
+ void *va_max_in
+ )
+{
+ int rc;
+
+ SPI_assert( c_sw != NULL );
+
+ /*
+ * Round up to 16B boundary and 16-B align the virtual address and store
+ * result in software counter structure.
+ */
+ c_sw->va_max = (char*) ( (unsigned)va_max_in & 0xFFFFFFF0 );
+ if ( c_sw->va_max != va_max_in ) c_sw->va_max = (char*)c_sw->va_max + 0x00000010;
+
+ /*
+ * Get the 16B-aligned 4-bit shifted physical address from the virtual address.
+ */
+ rc = Kernel_VaTo4bitShiftedPa(c_sw->va_max,
+ 1,
+ &(c_sw->pa_max) );
+
+ if ( rc != 0 )
+ {
+ errno = EFAULT;
+ return (-1);
+ }
+
+ /*
+ * Write physical address to the hardware counter
+ */
+ DMA_CounterSetMaxHw(c_sw->counter_hw_ptr,
+ c_sw->pa_max);
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+
+ return (0);
+
+}
+
+
+/*!
+ * \brief Set DMA Counter Value and Base Address
+ *
+ * Set a DMA counter's value and base address, given a pointer to the software
+ * counter structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ * \param[in] value The value to be set into the counter
+ * \param[in] va_base_in The base virtual address to be associated with the
+ * counter.
+ *
+ * \retval 0 Success
+ * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to
+ * the va_base_in being a bad virtual address.
+ *
+ * \post In the software counter structure, va_base and pa_base are set.
+ * In the hardware counter structure, pa_base and value are set.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ * \note The va_base in the software counter structure is the va_base_in rounded
+ * down to the next lowest 16B-aligned address. The pa_base is the 4-bit
+ * shifted version of va_base.
+ *
+ */
+__INLINE__ int DMA_CounterSetValueBase(
+ DMA_Counter_t *c_sw,
+ unsigned int value,
+ void *va_base_in
+ )
+{
+ int rc=0;
+
+ SPI_assert( c_sw != NULL );
+
+ /*
+ * 16-B align the virtual address and store result in software counter
+ * structure
+ */
+ c_sw->va_base = (char*) ( (unsigned)va_base_in & 0xFFFFFFF0 );
+
+ /*
+ * Get the 16B-aligned 4-bit shifted physical address from the virtual address.
+ */
+ rc = Kernel_VaTo4bitShiftedPa(c_sw->va_base,
+ 1,
+ &(c_sw->pa_base) );
+ if ( rc != 0 )
+ {
+ errno = EFAULT;
+ return (-1);
+ }
+
+ /*
+ * Write the value and physical address to the hardware counter
+ */
+ DMA_CounterSetValueBaseHw(c_sw->counter_hw_ptr,
+ value,
+ c_sw->pa_base );
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+
+ return (0);
+}
+
+
+/*!
+ * \brief Set DMA Counter Value, Base Address, and Max Address
+ *
+ * Set a reception DMA counter's value, base address, and max address, given a
+ * pointer to the software counter structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ * \param[in] value The value to be set into the counter
+ * \param[in] va_base_in The base virtual address to be associated with the
+ * counter.
+ * \param[in] va_max_in The max virtual address to be associated with the
+ * counter.
+ *
+ * \retval 0 Success
+ * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to
+ * the va_base_in or va_max_in being a bad virtual address.
+ *
+ * \post In the software counter structure, va_base, pa_base, va_max, and pa_max
+ * are set. In the hardware counter structure, pa_base, pa_max, and value
+ * are set.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ * \note The va_base in the software counter structure is the va_base_in rounded
+ * down to the next lowest 16B-aligned address. The pa_base is the 4-bit
+ * shifted version of va_base.
+ *
+ * \note The va_max in the software counter structure is the va_max_in rounded
+ * up to the next highest 16B-aligned address. The pa_max is the 4-bit
+ * shifted version of va_max.
+ *
+ */
+__INLINE__ int DMA_CounterSetValueBaseMax(
+ DMA_Counter_t *c_sw,
+ unsigned int value,
+ void *va_base_in,
+ void *va_max_in
+ )
+{
+ int rc=0;
+ void *va_base, *va_max;
+ unsigned int pa_base, pa_max;
+
+ SPI_assert( c_sw != NULL );
+
+ /*
+ * Process the base address:
+ * - 16-B align the virtual address and store result in software counter
+ * structure
+ * - Get the 16B-aligned 4-bit shifted physical address from the virtual
+ * address.
+ */
+ va_base = c_sw->va_base = (char*) ( (unsigned)va_base_in & 0xFFFFFFF0 );
+
+ rc = Kernel_VaTo4bitShiftedPa(va_base,
+ 1,
+ &pa_base );
+ if ( rc != 0 )
+ {
+ errno = EFAULT;
+ return (-1);
+ }
+
+ c_sw->pa_base = pa_base;
+
+ /*
+ * Process the max address:
+ * - 16B align the virtual address and store result in software counter structure.
+ * Note: we can't round up or the address may be one byte out of range.
+ * - Get the 16B-aligned 4-bit shifted physical address from the virtual
+ * address.
+ */
+ va_max = (char*) ( (unsigned)va_max_in & 0xFFFFFFF0 );
+
+ rc = Kernel_VaTo4bitShiftedPa(va_max,
+ 1,
+ &pa_max );
+/* printf("SetValueBaseMax: va_max_in=0x%08x, va_max=0x%08x, pa_max=0x%08x, rc=%d\n",(unsigned)va_max_in, (unsigned)va_max,pa_max,rc); */
+/* fflush(stdout); */
+ if ( rc != 0 )
+ {
+ errno = EFAULT;
+ return (-1);
+ }
+
+ c_sw->pa_max = pa_max;
+
+ /*
+ * Write the value, base, and max to the hardware counter
+ */
+ DMA_CounterSetValueBaseMaxHw(c_sw->counter_hw_ptr,
+ value,
+ pa_base,
+ pa_max);
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+
+ return (0);
+}
+
+
+/*!
+ * \brief Get DMA Counter Value
+ *
+ * Get a DMA counter's value, given a pointer to the software counter
+ * structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ *
+ * \retval value The value of the specified counter
+ *
+ * \note This function does an MSYNC after fetching the counter's value
+ * to ensure that the data that was just DMA'd is available to all
+ * cores.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetValue(
+ const DMA_Counter_t *c_sw
+ )
+{
+ unsigned int val;
+
+ SPI_assert( c_sw != NULL );
+
+ val = DMA_CounterGetValueHw( c_sw->counter_hw_ptr );
+
+ _bgp_msync();
+
+ return val;
+
+}
+
+
+/*!
+ * \brief Get DMA Counter Value with No Msync
+ *
+ * Get a DMA counter's value, given a pointer to the software counter
+ * structure. No Msync is done. It is up to the caller to do it,
+ * if necessary.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ *
+ * \retval value The value of the specified counter
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetValueNoMsync(
+ const DMA_Counter_t *c_sw
+ )
+{
+ unsigned int val;
+
+ SPI_assert( c_sw != NULL );
+
+ val = DMA_CounterGetValueHw( c_sw->counter_hw_ptr );
+
+ return val;
+
+}
+
+
+/*!
+ * \brief Get DMA Base Address
+ *
+ * Get a DMA counter's base virtual address, given a pointer to the software
+ * counter structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ *
+ * \retval va_base The base virtual address associated with the specified
+ * counter
+ *
+ * \note This returns the shadow va_base directly out of the software counter
+ * structure. This should correspond with the physical address in the
+ * hardware counter, but it is a rounded-down-to-the-previous-16B-boundary
+ * version of the actual base virtual address of the buffer the caller is
+ * working with.
+ *
+ */
+__INLINE__ void * DMA_CounterGetBase(
+ const DMA_Counter_t *c_sw
+ )
+{
+ SPI_assert( c_sw != NULL );
+
+ return( c_sw->va_base );
+}
+
+
+/*!
+ * \brief Get Reception DMA Max Address
+ *
+ * Get a reception DMA counter's max virtual address, given a pointer to
+ * the software counter structure.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ *
+ * \retval va_max The max virtual address associated with the specified
+ * counter
+ *
+ * \note This returns the shadow va_max directly out of the software counter
+ * structure. This should correspond with the physical address in the
+ * hardware counter, but it is a rounded-up-to-the-next-16B-boundary
+ * version of the actual max virtual address of the buffer the caller is
+ * working with.
+ *
+ */
+__INLINE__ void *DMA_CounterGetMax(
+ const DMA_Counter_t *c_sw
+ )
+{
+ SPI_assert( c_sw != NULL );
+
+ return( c_sw->va_max );
+}
+
+
+/*!
+ * \brief Get Offset from DMA Base Address
+ *
+ * Given a virtual address, get the offset from the base address associated with
+ * a counter.
+ *
+ * \param[in] c_sw Pointer to the software counter structure
+ * \param[in] va Virtual address whose offset from the counter's base is
+ * to be returned.
+ * \param[in] length The number of bytes in the buffer pointed to by va.
+ * \param[in] coreNum The number of the core in which the virtual
+ * address resides (0 to DMA_MAX_NUM_CORES).
+ *
+ * \retval offset The offset of the va from the counter's base.
+ *
+ * \note This uses the counter's physical base address and the application's
+ * memory segments (see DMA_CounterAppSegment_t).
+ *
+ * \note It is assumed that if the coreNum is not our core, then the counter's
+ * base address (used in calculating the offset) is the smallest physical
+ * address accessible from user space on coreNum
+ * (DMA_CounterMinPaAccessibleFromUserMode[coreNum]).
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetOffsetFromBase(
+ const DMA_Counter_t *c_sw,
+ void *va,
+ unsigned int length,
+ unsigned int coreNum
+ )
+{
+ SPI_assert( c_sw != NULL );
+ SPI_assert( va != NULL );
+ SPI_assert ( coreNum >= 0 );
+ SPI_assert ( coreNum <= DMA_MAX_NUM_CORES );
+ {
+ DMA_CounterAppSegment_t *appSegmentArray = DMA_CounterGetAppSegments( coreNum );
+ uint32_t numAppSegments;
+ uint32_t i;
+ uint32_t segmentVaBase;
+ uint32_t offset;
+ uint32_t ourCoreNum = Kernel_PhysicalProcessorID();
+ uint32_t counterPaBase;
+
+
+ /* Determine which application segment the virtual address is in. */
+ /* First, check the last app segment accessed. */
+ i = DMA_CounterCachedAppSegmentIndex[coreNum];
+ segmentVaBase = appSegmentArray[i].va_base;
+ if ( ! ( ((uint32_t)va >= segmentVaBase) &&
+ ((uint32_t)va - segmentVaBase < appSegmentArray[i].length) ) )
+ {
+ /* It is not the last app segment accessed. Search them. */
+ numAppSegments = DMA_CounterGetNumAppSegments();
+ for (i=0; i<numAppSegments; i++)
+ {
+ segmentVaBase = appSegmentArray[i].va_base;
+ if ( ((uint32_t)va >= segmentVaBase) &&
+ ((uint32_t)va - segmentVaBase < appSegmentArray[i].length) )
+ break;
+ }
+ SPI_assert(i < numAppSegments);
+ DMA_CounterCachedAppSegmentIndex[coreNum] = i;
+ }
+
+ /*
+ * Make sure buffer fits in app segment.
+ */
+ if ( ( (uint32_t)va + length - 1 ) > appSegmentArray[i].va_max )
+ {
+ printf("DMA_CounterGetOffsetFromBase: Buffer 0x%08x of length %d is out of bounds. Check length.\n",
+ (unsigned)va, length);
+ SPI_assert(0);
+ }
+
+ /*
+ * If coreNum is our core, use the offset from our core's counter base to
+ * calculate the DMA offset.
+ * Otherwise, assume the counter base is the smallest physical address
+ * accessible from user space on coreNum and use that.
+ */
+ if ( ourCoreNum == coreNum )
+ counterPaBase = c_sw->pa_base;
+ else
+ counterPaBase = DMA_CounterMinPaAccessibleFromUserMode[coreNum];
+
+ /*
+ * If the base physical address of the application segment found above is
+ * greater than or equal to the counter's base physical address (typical
+ * case), proceed with the calculation based on that.
+ *
+ * Otherwise, use a slightly different calculation (see else leg).
+ */
+ if ( appSegmentArray[i].pa_base >= counterPaBase )
+ {
+ /*
+ * Calculate the offset from the counter base:
+ * - offset from app segment's virtual address base (va - segmentVaBase) +
+ * - segment's physical base (shifted) - counter's base (shifted) * 16
+ */
+ offset =
+ ((unsigned)va - segmentVaBase) +
+ ( (appSegmentArray[i].pa_base - counterPaBase) << 4 );
+
+/* printf("GetOffsetFromBase: va=0x%08x, length=%d, offset=0x%08x, index=%d, segmentVaBase=0x%08x, appSegmentArrayPaBase=0x%08x, counterBase=0x%08x\n",(unsigned)va, length, offset, i, */
+/* segmentVaBase, appSegmentArray[i].pa_base, counterPaBase); */
+/* fflush(stdout); */
+ }
+ /*
+ * Handle the case where the counter's base exceeds the app segment's base.
+ * This occurs when the counter's base is set to the base of the buffer
+ * rather than the min base of all the app segments.
+ */
+ else
+ {
+ offset =
+ ((unsigned)va - segmentVaBase) -
+ ( (counterPaBase - appSegmentArray[i].pa_base) << 4 );
+
+/* printf("GetOffsetFromBase2: va=0x%08x, length=%d, offset=0x%08x, index=%d, segmentVaBase=0x%08x, appSegmentArrayPaBase=0x%08x, counterBase=0x%08x\n",(unsigned)va, length, offset, i, */
+/* segmentVaBase, appSegmentArray[i].pa_base, counterPaBase); */
+/* fflush(stdout); */
+ }
+
+ return ( offset );
+ }
+}
+
+
+
+
+/*
+ * ------------------------------------------------------------------------------
+ *
+ * The following functions access counters by specifying the group pointer and
+ * counter_id.
+ *
+ * ------------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Set DMA Counter Value using a Counter ID
+ *
+ * Set a DMA counter's value, given a counter group structure and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when
+ * the counter was allocated
+ * \param[in] counter_id Identifier of the counter being set
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in] value The value to be set into the counter
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetValueById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id,
+ unsigned int value
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ DMA_CounterSetValue( &cg_ptr->counter[counter_id],
+ value );
+
+ /* Note: it is assumed that the above function call performs an MBAR */
+}
+
+
+/*!
+ * \brief Set DMA Counter Base Address using a Counter ID
+ *
+ * Set a DMA counter's base address, given a counter group structure and
+ * counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when
+ * the counter was allocated.
+ * \param[in] counter_id Identifier of the counter being set
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in] va_base_in The base virtual address to be associated with the
+ * counter.
+ *
+ * \retval 0 Success
+ * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to
+ * the va_base_in being a bad virtual address.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ int DMA_CounterSetBaseById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id,
+ void *va_base_in
+ )
+{
+ int rc;
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ rc = DMA_CounterSetBase( &cg_ptr->counter[counter_id],
+ va_base_in );
+
+ /* Note: it is assumed that the above function call performs an MBAR */
+
+ return rc;
+}
+
+
+/*!
+ * \brief Increment DMA Counter using a Counter ID
+ *
+ * Increment a DMA counter's value, given a counter group structure and
+ * counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when
+ * the counter was allocated.
+ * \param[in] counter_id Identifier of the counter being incremented
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in] incr The amount to increment the counter by
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterIncrementById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id,
+ unsigned int incr
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ DMA_CounterIncrement( &cg_ptr->counter[counter_id],
+ incr );
+
+ /* Note: it is assumed that the above function call performs an MBAR */
+}
+
+
+/*!
+ * \brief Decrement DMA Counter using a Counter ID
+ *
+ * Decrement a DMA counter's value, given a counter group structure and
+ * counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when
+ * the counter was allocated.
+ * \param[in] counter_id Identifier of the counter being decremented
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in] decr The amount to decrement the counter by
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterDecrementById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id,
+ unsigned int decr
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ DMA_CounterDecrement( &cg_ptr->counter[counter_id],
+ decr );
+
+ /* Note: it is assumed that the above function call performs an MBAR */
+}
+
+
+/*!
+ * \brief Set Reception DMA Counter Max Address using a Counter ID
+ *
+ * Set a reception DMA counter's base address, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when
+ * the counter was allocated.
+ * \param[in] counter_id Identifier of the counter being set
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in] va_max_in The max virtual address to be associated with the
+ * counter.
+ *
+ * \retval 0 Success
+ * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to
+ * the va_max_in being a bad virtual address.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ int DMA_CounterSetMaxById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id,
+ void *va_max_in
+ )
+{
+ int rc;
+
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ rc = DMA_CounterSetMax( &cg_ptr->counter[counter_id],
+ va_max_in );
+
+ /* Note: it is assumed that the above function call performs an MBAR */
+
+ return rc;
+
+}
+
+
+/*!
+ * \brief Get DMA Counter Value using a Counter ID
+ *
+ * Get a DMA counter's value, given a counter group structure and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when
+ * the counter was allocated.
+ * \param[in] counter_id Identifier of the counter
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval value The value of the counter
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetValueById(
+ const DMA_CounterGroup_t *cg_ptr,
+ const int counter_id
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ return ( DMA_CounterGetValue( &cg_ptr->counter[counter_id] ) );
+}
+
+
+/*!
+ * \brief Get DMA Counter Base Address using a Counter ID
+ *
+ * Get a DMA counter's base virtual address, given a counter group structure and
+ * counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] counter_id Identifier of the counter
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval va_base The base virtual address associated with the specified
+ * counter
+ *
+ * \note This returns the shadow va_base directly out of the software counter
+ * structure. This should correspond with the physical address in the
+ * hardware counter, but it is a rounded-down-to-the-previous-16B-boundary
+ * version of the actual base virtual address of the buffer the caller is
+ * working with.
+ *
+ */
+__INLINE__ void * DMA_CounterGetBaseById(
+ const DMA_CounterGroup_t *cg_ptr,
+ int counter_id
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ return( DMA_CounterGetBase( &cg_ptr->counter[counter_id] ) );
+}
+
+
+/*!
+ * \brief Get Offset from DMA Base Address using a Counter ID
+ *
+ * Given a virtual address, get the offset from the base address associated with
+ * the specified counter.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when
+ * the counter was allocated
+ * \param[in] counter_id Identifier of the counter
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in] va Virtual address whose offset from the counter's base
+ * is to be returned.
+ * \param[in] length The number of bytes in the buffer pointed to by va.
+ * \param[in] coreNum The number of the core in which the virtual
+ * address resides (0 to DMA_MAX_NUM_CORES).
+ *
+ * \retval offset The offset of the va from the counter's base.
+ *
+ * \note This works with the shadow va_base directly out of the software counter
+ * structure. This should correspond with the physical address in the
+ * hardware counter, but it is a rounded-down-to-the-previous-16B-boundary
+ * version of the actual base virtual address of the buffer the caller is
+ * working with.
+ *
+ * \note No effort is given to flag the case where va is less than the base
+ * address. In that case, (va - va_base) is returned, whatever that is.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetOffsetFromBaseById(
+ const DMA_CounterGroup_t *cg_ptr,
+ int counter_id,
+ void *va,
+ unsigned int length,
+ unsigned int coreNum
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+/* printf("Getting offset from counter %d for core %d\n",counter_id,coreNum); */
+ return( DMA_CounterGetOffsetFromBase( &cg_ptr->counter[counter_id],
+ va,
+ length,
+ coreNum ) );
+}
+
+
+/*!
+ * \brief Get Reception DMA Counter Max Address Using a Counter ID
+ *
+ * Get a reception DMA counter's maximum virtual address, given a counter group
+ * structure and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] counter_id Identifier of the counter
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval va_max The virtual address of the max of the counter
+ *
+ * \note This returns the shadow va_max directly out of the software counter
+ * structure. This should correspond with the physical address in the
+ * hardware counter, but it is a rounded-up-to-the-next-16B-boundary
+ * version of the actual max virtual address of the buffer the caller is
+ * working with.
+ *
+ */
+__INLINE__ void * DMA_CounterGetMaxById(
+ const DMA_CounterGroup_t *cg_ptr,
+ const int counter_id
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ return ( DMA_CounterGetMax( &cg_ptr->counter[counter_id] ) );
+}
+
+
+/*!
+ * \brief Set DMA Counter Value and Base Address using a Counter ID
+ *
+ * Set a DMA counter's value and base address, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] counter_id Identifier of the counter being set
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in] value The value to be set into the counter
+ * \param[in] va_base_in The base virtual address to be associated with the
+ * counter.
+ *
+ * \retval 0 Success
+ * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to
+ * the va_base_in being a bad virtual address.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ int DMA_CounterSetValueBaseById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id,
+ unsigned int value,
+ void *va_base_in
+ )
+{
+ int rc;
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ rc = DMA_CounterSetValueBase( &cg_ptr->counter[counter_id],
+ value,
+ va_base_in );
+
+ /* Note: it is assumed that the above function call performs an MBAR */
+
+ return rc;
+}
+
+
+
+
+/*!
+ * \brief Set Reception DMA Counter Value, Base Address, and Max Address using
+ * a Counter ID
+ *
+ * Set a reception DMA counter's value, base address, and max address, given a
+ * counter group structure and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] counter_id Identifier of the counter being set
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ * \param[in] value The value to be set into the counter
+ * \param[in] va_base_in The base virtual address to be associated with the
+ * counter.
+ * \param[in] va_max_in The max virtual address to be associated with the
+ * counter.
+ *
+ * \retval 0 Success
+ * \retval -1 Failure. errno contains the reason. Most likely EFAULT due to
+ * the va_base_in or va_max_in being a bad virtual address.
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ int DMA_CounterSetValueBaseMaxById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id,
+ unsigned int value,
+ void *va_base_in,
+ void *va_max_in
+ )
+{
+ int rc;
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ rc = DMA_CounterSetValueBaseMax( &cg_ptr->counter[counter_id],
+ value,
+ va_base_in,
+ va_max_in );
+
+ /* Note: it is assumed that the above function call performs an MBAR */
+
+ return rc;
+
+}
+
+
+/*!
+ * \brief Enable DMA Counter using a Counter ID
+ *
+ * Enable a DMA counter, given a counter group structure and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] counter_id Identifier of the counter being enabled
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetEnableById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ {
+ /* Enable the counter by writing 1 to the appropriate bit */
+ int r = DMA_COUNTER_GROUP_WORD_ID(counter_id);
+ int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+ cg_ptr->status_ptr->enable[r] = _BN(c);
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+ }
+}
+
+
+/*!
+ * \brief Disable DMA Counter using a Counter ID
+ *
+ * Disable a DMA counter, given a counter group structure and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] counter_id Identifier of the counter being disabled
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetDisableById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ {
+ /* Disable the counter by writing 1 to the appropriate bit */
+ int r = DMA_COUNTER_GROUP_WORD_ID(counter_id);
+ int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+ cg_ptr->status_ptr->disable[r] = _BN(c);
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+ }
+}
+
+
+/*!
+ * \brief Determine Whether a DMA Counter is Enabled using a Counter ID
+ *
+ * Determine whether a DMA counter is enabled, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] counter_id Identifier of the counter being queried
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval 0 The counter is disabled
+ * \retval 1 The counter is enabled
+ *
+ */
+__INLINE__ int DMA_CounterGetEnabledById(
+ const DMA_CounterGroup_t *cg_ptr,
+ int counter_id
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ {
+ /* Return 0 or 1 if counter is disabled/enabled */
+ int r = DMA_COUNTER_GROUP_WORD_ID(counter_id);
+ int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+ if ( ( cg_ptr->status_ptr->enabled[r] & _BN(c) ) == 0 ) {return 0;}
+ else { return 1;}
+ }
+}
+
+
+/*!
+ * \brief Determine Whether a DMA Counter is Has Hit Zero using a Counter ID
+ *
+ * Determine whether a DMA counter has hit zero, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] counter_id Identifier of the counter being queried
+ * (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \retval 0 The counter has not hit zero
+ * \retval 1 The counter has hit zero
+ *
+ * \note This function does an MSYNC after determining that the counter has hit
+ * zero to ensure that the data that was just DMA'd is available to all
+ * cores. The msync is only done if this is a reception counter group,
+ * since there is nothing to sync for injection counters that have hit zero.
+ *
+ */
+__INLINE__ int DMA_CounterGetHitZeroById(
+ const DMA_CounterGroup_t *cg_ptr,
+ int counter_id
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ {
+ /* Return 0 or 1 if counter has hit zero */
+ int r = DMA_COUNTER_GROUP_WORD_ID(counter_id);
+ int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+ if ( ( cg_ptr->status_ptr->hit_zero[r] & _BN(c) ) == 0 ) {return 0;}
+ else {
+ /* By convention, we assume that if counter has hit zero, then it will be
+ * used. This requires an msync to ensure snoops from the DMA arbiter
+ * have hit the cores. That is, the data that was just DMA'd is available
+ * to all cores.
+ *
+ * Furthermore, If we just put a _bgp_msync() here, it could get
+ * speculatively executed and withdrawn even if the counter hasn't hit zero,
+ * so we call a special version of this function that ensures the speculation
+ * does not occur.
+ *
+ * It only needs to be done for reception counters since there is nothing
+ * to sync when sending data.
+ */
+ if ( cg_ptr->type == DMA_Type_Reception ) _bgp_msync_nonspeculative();
+ return 1;
+ }
+ }
+}
+
+
+/*!
+ * \brief Clear a DMA Counter's Hit Zero Status using a Counter ID
+ *
+ * Clear a DMA counter's "hit zero" status, given a counter group structure
+ * and counter ID.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] counter_id Identifier of the counter whose "hit zero" status is
+ * being cleared (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterClearHitZeroById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id
+ )
+{
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ {
+ /* Clear the hit zero bit of a counter */
+ int r = DMA_COUNTER_GROUP_WORD_ID(counter_id);
+ int c = DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id);
+ cg_ptr->status_ptr->clear_hit_zero[r] = _BN(c) ;
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+ }
+}
+
+
+/*
+ * ------------------------------------------------------------------------------
+ *
+ * The following functions manipulate or get the status of multiple counters
+ *
+ * ------------------------------------------------------------------------------
+ */
+
+
+/*!
+ * \brief Enable Multiple DMA Counters
+ *
+ * Enable multiple DMA counters, given a counter group structure and mask.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] reg Identifies the "word" (0 or 1) of the counters
+ * being manipulated. This is the index into the
+ * enable array.
+ * \param[in] counterBits Identifies which counters in the "word" are being
+ * manipulated.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetEnable(
+ DMA_CounterGroup_t *cg_ptr,
+ int reg,
+ unsigned int counterBits
+ )
+{
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( ( ( reg == 0 ) || ( reg == 1 ) ) );
+ SPI_assert( counterBits == (counterBits & cg_ptr->permissions[reg]) );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ cg_ptr->status_ptr->enable[reg] = counterBits;
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+}
+
+
+/*!
+ * \brief Disable Multiple DMA Counters
+ *
+ * Disable multiple DMA counters, given a counter group structure and mask.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] reg Identifies the "word" (0 or 1) of the counters
+ * being manipulated. This is the index into the
+ * disable array.
+ * \param[in] counterBits Identifies which counters in the "word" are being
+ * manipulated.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterSetDisable(DMA_CounterGroup_t *cg_ptr,
+ int reg,
+ unsigned int counterBits
+ )
+{
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( ( ( reg == 0 ) || ( reg == 1 ) ) );
+ SPI_assert( counterBits == (counterBits & cg_ptr->permissions[reg]) );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ cg_ptr->status_ptr->disable[reg] = counterBits;
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+}
+
+
+/*!
+ * \brief Get Enabled DMA Counters
+ *
+ * Get the enabled status of DMA counters, given a counter group structure
+ * and "word".
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] reg Identifies the "word" (0 or 1) of the counters
+ * being queried. This is the index into the
+ * enabled array.
+ *
+ * \return 32 bit mask indicating which counters in the specified word are enabled.
+ * Only the counters that the caller has allocated will have their status
+ * returned. The status for other counters will be 0.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetEnabled(
+ const DMA_CounterGroup_t *cg_ptr,
+ int reg
+ )
+{
+ SPI_assert( ( ( cg_ptr != NULL ) &&
+ ( ( reg == 0 ) || ( reg == 1 ) ) ) );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ return (cg_ptr->permissions[reg] & cg_ptr->status_ptr->enabled[reg]);
+}
+
+
+/*!
+ * \brief Get Hit Zero Status of DMA Counters
+ *
+ * Get the "hit zero" status of DMA counters, given a counter group structure
+ * and "word".
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] reg Identifies the "word" (0 or 1) of the counters
+ * being queried. This is the index into the
+ * hit zero array.
+ *
+ * \return 32 bit mask indicating which counters in the specified word hit zero.
+ * Only the counters that the caller has allocated will have their status
+ * returned. The status for other counters will be 0.
+ *
+ * \note This function does an MSYNC after determining that the counter has hit
+ * zero to ensure that the data that was just DMA'd is available to all
+ * cores. The msync is only done if this is a reception counter group,
+ * since there is nothing to sync for injection counters that have hit zero.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetHitZero(
+ const DMA_CounterGroup_t *cg_ptr,
+ int reg
+ )
+{
+ unsigned int x;
+
+ SPI_assert( ( ( cg_ptr != NULL ) &&
+ ( ( reg == 0 ) || ( reg == 1 ) ) ) );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ x = cg_ptr->status_ptr->hit_zero[reg];
+
+ if ( x != 0 ) {
+
+ x &= cg_ptr->permissions[reg];
+
+ if ( ( cg_ptr->type == DMA_Type_Reception ) &&
+ ( x != 0 ) )
+ _bgp_msync_nonspeculative();
+
+ }
+
+ return (x);
+}
+
+
+/*!
+ * \brief Get Hit Zero Status of All DMA Counters In the Specified Group
+ *
+ * Get the "hit zero" status of all DMA counters in the group specified by the
+ * counter group structure.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when
+ * the counter was allocated.
+ * \param[in,out] word0 Pointer to the first status word, for the first 32
+ * counters.
+ * \param[in,out] word1 Pointer to the second status word, for the second 32
+ * counters.
+ *
+ * \return word0 and word1 are set to the status of the counters.
+ * Only the counters that the caller has allocated will have their
+ * status returned. The status for other counters will be 0.
+ *
+ * \note This function does an MSYNC after determining that at least 1 counter
+ * has hit zero to ensure that the data that was just DMA'd is available
+ * to all cores. The msync is only done if this is a reception counter
+ * group, since there is nothing to sync for injection counters that have
+ * hit zero.
+ *
+ */
+__INLINE__ void DMA_CounterGetAllHitZero(
+ const DMA_CounterGroup_t *cg_ptr,
+ unsigned int *word0,
+ unsigned int *word1
+ )
+{
+ unsigned int x,y;
+
+ SPI_assert( ( cg_ptr != NULL ) &&
+ ( word0 != NULL ) &&
+ ( word1 != NULL ) );
+ SPI_assert( cg_ptr->status_ptr != 0 );
+
+ x = cg_ptr->status_ptr->hit_zero[0];
+ y = cg_ptr->status_ptr->hit_zero[1];
+
+ if ( (x | y) != 0 ) {
+ x &= cg_ptr->permissions[0];
+ y &= cg_ptr->permissions[1];
+
+ if ( ( cg_ptr->type == DMA_Type_Reception ) &&
+ ( (x | y) != 0 ) )
+ _bgp_msync_nonspeculative();
+ }
+
+ *word0 = x;
+ *word1 = y;
+
+ return;
+}
+
+
+/*!
+ * \brief Clear Hit Zero Status of DMA Counters
+ *
+ * Clear the "hit zero" status of DMA counters, given a counter group structure,
+ * a "word", and a mask of counters.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counter was allocated.
+ * \param[in] reg Identifies the "word" (0 or 1) of the counters
+ * being manipulated. This is the index into the
+ * clear_hit_zero array.
+ * \param[in] counterBits Identifies which counters in the "word" are being
+ * manipulated.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the counter to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ *
+ */
+__INLINE__ void DMA_CounterGroupClearHitZero(
+ DMA_CounterGroup_t *cg_ptr,
+ int reg,
+ unsigned int counterBits
+ )
+{
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( ( ( reg == 0 ) || ( reg == 1 ) ) );
+ SPI_assert( counterBits == (counterBits & cg_ptr->permissions[reg]) );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ cg_ptr->status_ptr->clear_hit_zero[reg] = counterBits;
+
+ _bgp_mbar(); /* Make sure these writes have been accepted by the memory */
+ /* system before continuing */
+}
+
+
+/*!
+ * \brief Get DMA Counter Group Status
+ *
+ * Get the DMA Counter Group Status, given a counter group structure.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counters were allocated.
+ *
+ * \return 32 bit mask indicating which subgroups have counters that are enabled and
+ * have hit zero. Only the subgroups that the caller has allocated will have
+ * their status returned. The status for other subgroups will be 0.
+ *
+ * \note This function does an MSYNC after determining that the counter has hit
+ * zero to ensure that the data that was just DMA'd is available to all
+ * cores. The msync is only done if this is a reception counter group,
+ * since there is nothing to sync for injection counters that have hit zero.
+ *
+ */
+__INLINE__ unsigned int DMA_CounterGetGroupStatus(
+ const DMA_CounterGroup_t *cg_ptr
+ )
+{
+ unsigned int x;
+
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( cg_ptr->status_ptr != 0);
+
+ x = cg_ptr->status_ptr->grp_status;
+
+ if ( x != 0 ) {
+
+ x &= cg_ptr->grp_permissions;
+
+ if ( ( cg_ptr->type == DMA_Type_Reception ) &&
+ ( x != 0 ) )
+ _bgp_msync_nonspeculative();
+
+ }
+
+ return x;
+}
+
+
+/*!
+ * \brief Get DMA Counter Group Number
+ *
+ * Get the DMA Counter Group number, given a counter group structure.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counters were allocated.
+ *
+ * \return The DMA Counter Group number
+ *
+ */
+__INLINE__ int DMA_CounterGetGroupNum(
+ const DMA_CounterGroup_t *cg_ptr
+ )
+{
+ SPI_assert( cg_ptr != NULL );
+
+ return cg_ptr->group_id;
+}
+
+
+/*!
+ * \brief Get DMA Counter Global Id
+ *
+ * Get the global Id of a DMA Counter, given a counter group structure and a counter Id.
+ *
+ * \param[in] cg_ptr Pointer to the structure previously filled in when the
+ * counters were allocated.
+ * \param[in] counter_id Identifier of the counter
+ *
+ * \return The DMA Counter Global Id (0 to DMA_NUM_COUNTERS-1)
+ *
+ */
+__INLINE__ int DMA_CounterGetGlobalId(
+ const DMA_CounterGroup_t *cg_ptr,
+ int counter_id
+ )
+{
+ SPI_assert( ( cg_ptr != NULL ) &&
+ ( counter_id >= 0 ) &&
+ ( counter_id < DMA_NUM_COUNTERS_PER_GROUP ) );
+
+ return( counter_id + (DMA_NUM_COUNTERS_PER_GROUP * cg_ptr->group_id) );
+}
+
+
+/*!
+ * \brief Get DMA Counter Local Id
+ *
+ * Get the local Id of a DMA Counter, given a counter group structure and a Global
+ * counter Id.
+ *
+ * \param[in] counter_id Global Identifier of the counter (0 to DMA_NUM_COUNTERS-1)
+ *
+ * \return The DMA Counter Local Id (0 to DMA_NUM_COUNTERS_PER_GROUP-1)
+ *
+ */
+__INLINE__ int DMA_CounterGetLocalId(
+ int counter_id
+ )
+{
+ return( counter_id % DMA_NUM_COUNTERS_PER_GROUP );
+}
+
+
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_Descriptors.h b/arch/powerpc/include/spi/DMA_Descriptors.h
new file mode 100644
index 00000000000000..ae9fc11721ffa7
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Descriptors.h
@@ -0,0 +1,1505 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef _DMA_DESCRIPTORS_H_ /* Prevent multiple inclusion */
+#define _DMA_DESCRIPTORS_H_
+
+/*!
+ * \file spi/DMA_Descriptors.h
+ *
+ * \brief DMA SPI Descriptor Definitions and Inline Functions
+ *
+ * This header file contains the definition of the DMA_InjDescriptor_t, which is
+ * put into the tail of an injection fifo to initiate a DMA transfer.
+ *
+ * The following defines the terms used for describing the various kinds of
+ * descriptors:
+ * - "Torus" means the transfer is between nodes.
+ * - "Local" means the transfer is within the same node.
+ * - "Direct-put" means the data is put directly into the destination node's
+ * memory.
+ * - "MemFifo" means the packets are put into the destination node's reception
+ * fifo.
+ * - "Remote-get" means the packet payload contains an injection descriptor
+ * to be injected into the destination node's injection fifo.
+ * - Prefetch-only" means the payload is just pre-fetched into L3. It is not
+ * transferred to the destination node.
+ *
+ * The following are the functions provided for creating injection descriptors:
+ * - DMA_TorusDirectPutDescriptor
+ * - DMA_LocalDirectPutDescriptor
+ * - DMA_LocalPrefetchOnlyDescriptor
+ * - DMA_TorusRemoteGetDescriptor
+ * - DMA_LocalRemoteGetDescriptor
+ * - DMA_TorusMemFifoDescriptor
+ * - DMA_LocalMemFifoDescriptor
+ * - DMA_TorusDirectPutBcastDescriptor
+ * - DMA_TorusMemFifoBcastDescriptor
+ *
+ *
+ * There are also functions for setting or changing specific values in the
+ * injection descriptors.
+ *
+ */
+
+
+
+
+#include <common/namespace.h>
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ * placed in the translation unit (eg. xxx.c)for use within that unit.
+ * The function is not externalized for use by another unit...we want this
+ * so we don't end up with multiple units exporting the same function,
+ * which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+ * \verbatim
+ #ifndef INLINE
+ # define INLINE extern inline
+ #endif
+ INLINE int max(int a, int b) {
+ return a > b ? a : b;
+ }
+ \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+ * \verbatim
+ #define INLINE
+ #include "header.h"
+ \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+
+#include <bpcore/bgp_types.h>
+#include <common/alignment.h>
+#include <common/bgp_bitnumbers.h>
+#include <spi/DMA_Packet.h>
+#include <spi/DMA_Assert.h>
+
+
+
+
+/*!
+ * \brief Packet Header - Checksum Skip Bytes
+ *
+ * Default number of 2 byte units to skip from the top of a packet before
+ * including the packet bytes into the running checksum of the torus
+ * injection fifo where this packet is injected.
+ *
+ * 8 corresponds to skipping 16 bytes, which is the DMA packet header size
+ * (hardware header + software header).
+ */
+#define DMA_CSUM_SKIP 8
+
+
+/*!
+ * \brief Packet Header - Checksum Skip Packet
+ *
+ * Default value for the torus injection checksum skip packet bit.
+ * - 0 includes the packet (excluding the portion designated by DMA_CSUM_SKIP)
+ * in the checksum.
+ * - 1 excludes the entire packet from the checksum.
+ */
+#define DMA_CSUM_BIT 0
+
+
+
+
+/*!
+ * \brief DMA Injection Descriptor Structure
+ *
+ */
+typedef struct DMA_InjDescriptor_t
+{
+ union {
+ unsigned word1; /*!< For accessing fields as 32-bit word */
+
+ struct {
+ unsigned rsvd0 : 24; /*!< 3 bytes: unused */
+
+ unsigned rsvd1 : 6; /*!< Bits 0-5: unused flags */
+
+ unsigned prefetch_only : 1; /*!< Bit 6: prefetch only, on local
+ memcopy:
+ 0 = Data is both read and written,
+ 1 = Data is only read.
+ This bit is ignored for torus
+ packets. */
+
+ unsigned local_memcopy : 1; /*!< Bit 7: local memory copy bit:
+ 0 = The message is a torus message,
+ 1 = The message is a local copy. */
+ };
+ };
+
+ union {
+ unsigned word2; /*!< For accessing fields as 32-bit word */
+
+ struct {
+ unsigned rsvd2 : 24; /*!< 3 bytes: unused */
+
+ unsigned idma_counterId : 8; /*!< 1 byte: Injection Counter Id. */
+ };
+ };
+
+ unsigned base_offset : 32; /*!< 4 bytes: pointer to base address of
+ message payload. This gets added to
+ the base address associated with the
+ idma_counterId injection counter. */
+
+ unsigned msg_length : 32; /*!< 4 bytes: message length (in bytes) */
+
+ DMA_PacketHeader_t hwHdr; /*!< DMA Hardware Packet Header */
+
+}
+DMA_InjDescriptor_t ALIGN_QUADWORD;
+/*!
+ * \todo Change to ALIGN_L1D_CACHE when it works.
+ *
+ */
+
+
+/*!
+ * \brief Static Info from Personality
+ *
+ * The following structure defines information from the personality.
+ * It is intended to be static so, once the info is retrieved from
+ * the personality, it does not need to be retrieved again (it is a
+ * system call to retrieve personality info).
+ *
+ */
+typedef struct DMA_PersonalityInfo_t
+{
+ unsigned int personalityRetrieved; /*!< 0 = Personality Info not
+ retrieved into this
+ structure yet.
+ 1 = Personality Info in this
+ structure is valid. */
+ uint8_t nodeXCoordinate; /*!< X coord of the calling node. */
+ uint8_t nodeYCoordinate; /*!< Y coord of the calling node. */
+ uint8_t nodeZCoordinate; /*!< Z coord of the calling node. */
+ uint8_t xNodes; /*!< X dimension of the block. */
+ uint8_t yNodes; /*!< Y dimension of the block. */
+ uint8_t zNodes; /*!< Z dimension of the block. */
+}
+DMA_PersonalityInfo_t;
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Direct Put Message
+ *
+ * A torus direct put message is one that is sent to another node and its data
+ * is directly put into memory by the DMA on the destination node...it does
+ * not go into a reception fifo.
+ *
+ * A torus direct-put DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = DMA_CSUM_SKIP.
+ * - Sk = DMA_CSUM_BIT.
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 1 (Indicates a direct-put packet).
+ * - Dynamic = Set based on caller's "vc".
+ * - VC = Set to caller's "vc".
+ * - X,Y,Z = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = Destination message offset (from the reception
+ * counter's base address). Set to caller's recv_offset.
+ * - rDMA_Counter = Reception counter ID. This counter is located on the
+ * destination node and contains the base address of the
+ * message and the message length. Set based on caller's
+ * recv_ctr_grp_id and recv_ctr_id.
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] x The destination's x coordinate (8 bits).
+ * \param[in] y The destination's y coordinate (8 bits).
+ * \param[in] z The destination's z coordinate (8 bits).
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Both of x+ and x- cannot be set at the same
+ * time...same with y and z.
+ * \param[in] vc The virtual channel that the packet must go
+ * into if it fails to win the bypass
+ * arbitration in the receiving node.
+ * - 0 = Virtual channel dynamic 0
+ * - 1 = Virtual channel dynamic 1
+ * - 2 = Virtual channel deterministic bubble
+ * - 3 = Virtual channel deterministic priority
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_ctr_grp_id Reception counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] recv_ctr_id Reception counter ID (within the recv counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] recv_offset Offset of the payload from the pa_base
+ * associated with the specified reception
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ * checksum. In the first byte of the torus hardware packet header,
+ * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ * The defaults can be changed by changing DMA_CSUM_SKIP and
+ * DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_ctr_grp_id:
+ * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+ */
+int DMA_TorusDirectPutDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int x,
+ unsigned int y,
+ unsigned int z,
+ unsigned int hints,
+ unsigned int vc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_ctr_grp_id,
+ unsigned int recv_ctr_id,
+ unsigned int recv_offset,
+ unsigned int msg_len
+ );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Direct Put Message
+ *
+ * A local direct put message is one that is targeted within the same node, and
+ * its data is directly put into memory by the DMA...it does not go into a
+ * reception fifo. This is essentially a memcpy via DMA.
+ *
+ * A local direct-put DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 1
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used).
+ * - Sk = 0 (not used).
+ * - Hint = 0 (not used).
+ * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id".
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 1 (Indicates a direct-put packet).
+ * - Dynamic = 0 (not used).
+ * - VC = 0 (not used).
+ * - X,Y,Z = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = Destination message offset (from the reception
+ * counter's base address). Set to caller's recv_offset.
+ * - rDMA_Counter = Reception counter ID. This counter is located on the
+ * destination node and contains the base address of the
+ * message and the message length.. Set based on caller's
+ * recv_ctr_grp_id and recv_ctr_id.
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_ctr_grp_id Reception counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] recv_ctr_id Reception counter ID (within the recv counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] recv_offset Offset of the payload from the pa_base
+ * associated with the specified reception
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_ctr_grp_id:
+ * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ * The only use for the pid bits is for debug, ie, if headers are
+ * being saved.
+ */
+int DMA_LocalDirectPutDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_ctr_grp_id,
+ unsigned int recv_ctr_id,
+ unsigned int recv_offset,
+ unsigned int msg_len
+ );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local L3 Prefetch Only Message
+ *
+ * A local prefetch is one in which the DMA simply prefetches the send buffer
+ * into L3.
+ *
+ * A local prefetch DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 1
+ * - local_memcopy = 1
+ * - idma_counterId = Injection counter ID associated with the message being
+ * prefetched. This counter contains the base address of
+ * the message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used).
+ * - Sk = 0 (not used).
+ * - Hint = 0 (not used).
+ * - Pid0, Pid1 = 0 (not used).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 1 (Indicates a DMA packet).
+ * - Dynamic = 0 (not used).
+ * - VC = 0 (not used).
+ * - X,Y,Z = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = 0 (not used).
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ */
+int DMA_LocalPrefetchOnlyDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int msg_len
+ );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Remote-Get Message
+ *
+ * A torus remote-get message is one that is sent to another node and its data
+ * is directly put by the DMA into an injection fifo on the destination
+ * node...it does not go into a reception fifo. Therefore, the payload of this
+ * message is one (or more) descriptors for another message that is to be sent
+ * back to the originating node.
+ *
+ * By default, we assume that the payload of this remote get packet is a single
+ * descriptor. Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32.
+ * For remote gets whose payload is greater than 1 descriptor, the caller can
+ * change the packet Chunks and msg_length after this function builds the
+ * default descriptor.
+ *
+ * It is also assumed that the payload is NOT checksummed, since it is not
+ * always reproducible. Things like idma_counterId and base_offset may be
+ * different on another run, making checksumming inconsistent.
+ *
+ * A torus remote-get DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = 32.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used because Sk is 1).
+ * - Sk = 1 (do not checksum this packet).
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_inj_fifo_id" (see note).
+ * - Chunks = Set to (2)-1 = 1.
+ * - Dm = 1 (Indicates a DMA packet).
+ * - Dynamic = Set based on caller's "vc".
+ * - VC = Set to caller's "vc".
+ * - X,Y,Z = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 1.
+ * - iDMA_Fifo_ID = Injection fifo ID where the payload will be injected.
+ * Set based on caller's recv_inj_ctr_grp_id and
+ * recv_inj_ctr_id.
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] x The destination's x coordinate (8 bits).
+ * \param[in] y The destination's y coordinate (8 bits).
+ * \param[in] z The destination's z coordinate (8 bits).
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Both of x+ and x- cannot be set at the same
+ * time...same with y and z.
+ * \param[in] vc The virtual channel that the packet must go
+ * into if it fails to win the bypass
+ * arbitration in the receiving node.
+ * - 0 = Virtual channel dynamic 0
+ * - 1 = Virtual channel dynamic 1
+ * - 2 = Virtual channel deterministic bubble
+ * - 3 = Virtual channel deterministic priority
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_inj_fifo_grp_id Injection fifo group ID where payload
+ * will be injected on destination node
+ * (0 to DMA_NUM_INJ_FIFO_GROUPS-1).
+ * \param[in] recv_inj_fifo_id Injection fifo ID (within the
+ * recv_inj_fifo_grp_id group)
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_inj_fifo_grp_id:
+ * - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+ */
+int DMA_TorusRemoteGetDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int x,
+ unsigned int y,
+ unsigned int z,
+ unsigned int hints,
+ unsigned int vc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_inj_fifo_grp_id,
+ unsigned int recv_inj_fifo_id
+ );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Remote-Get Message
+ *
+ * A local remote-get message is one whose data is directly put by the DMA into
+ * an injection fifo on the local node...it does not go into a reception fifo.
+ * Therefore, the payload of this message is one (or more) descriptors for
+ * another message that is to be injected on the local node.
+ *
+ * By default, we assume that the payload of this remote get packet is a single
+ * descriptor. Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32.
+ * For remote gets whose payload is greater than 1 descriptor, the caller can
+ * change the packet Chunks and msg_length after this function builds the
+ * default descriptor.
+ *
+ * A local remote-get DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 1
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = 32.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used).
+ * - Sk = 0 (not used).
+ * - Hint = 0 (Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_inj_fifo_id" (see note).
+ * - Chunks = Set to (2)-1 = 1.
+ * - Dm = 1 (Indicates a DMA packet).
+ * - Dynamic = 0 (not used).
+ * - VC = 0 (not used).
+ * - X,Y,Z = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 1.
+ * - iDMA_Fifo_ID = Injection fifo ID where the payload will be injected.
+ * Set based on caller's inj_ctr_grp_id and inj_ctr_id.
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_inj_fifo_grp_id Injection fifo group ID where payload
+ * will be injected on local node
+ * (0 to DMA_NUM_INJ_FIFO_GROUPS-1).
+ * \param[in] recv_inj_fifo_id Injection fifo ID (within the
+ * recv_inj_fifo_grp_id group)
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the
+ * hardware packet header are determined by the recv_inj_fifo_grp_id:
+ * - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *
+ */
+int DMA_LocalRemoteGetDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_inj_fifo_grp_id,
+ unsigned int recv_inj_fifo_id
+ );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Memory Fifo Message
+ *
+ * A torus memory fifo message is one that is sent to another node and its data
+ * is put into a reception memory fifo by the DMA on the destination node.
+ *
+ * A torus memory fifo DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = DMA_CSUM_SKIP.
+ * - Sk = DMA_CSUM_BIT.
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 0 (Indicates a memory fifo packet).
+ * - Dynamic = Set based on caller's "vc".
+ * - VC = Set to caller's "vc".
+ * - X,Y,Z = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = 0 (not used).
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg.
+ * - Func_Id = The registration ID of a function to receive control
+ * on the destination node to process the packet.
+ * Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] x The destination's x coordinate (8 bits).
+ * \param[in] y The destination's y coordinate (8 bits).
+ * \param[in] z The destination's z coordinate (8 bits).
+ * \param[in] recv_fifo_grp_id Reception fifo group ID
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Both of x+ and x- cannot be set at the same
+ * time...same with y and z.
+ * \param[in] vc The virtual channel that the packet must go
+ * into if it fails to win the bypass
+ * arbitration in the receiving node.
+ * - 0 = Virtual channel dynamic 0
+ * - 1 = Virtual channel dynamic 1
+ * - 2 = Virtual channel deterministic bubble
+ * - 3 = Virtual channel deterministic priority
+ * \param[in] sw_arg User-defined 24 bits to be placed into the
+ * packets (bits 8-31).
+ * \param[in] function_id Function id (8 bit registration ID) of the
+ * function to receive control on the
+ * destination node to process packets for this
+ * message.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ * checksum. In the first byte of the torus hardware packet header,
+ * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ * The defaults can be changed by changing DMA_CSUM_SKIP and
+ * DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_fifo_grp_id:
+ * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+*/
+int DMA_TorusMemFifoDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int x,
+ unsigned int y,
+ unsigned int z,
+ unsigned int recv_fifo_grp_id,
+ unsigned int hints,
+ unsigned int vc,
+ unsigned int sw_arg,
+ unsigned int function_id,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int msg_len
+ );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Memory Fifo Message
+ *
+ * A local memory fifo message is one whose data is put into a reception
+ * memory fifo on the same node by the DMA.
+ *
+ * A local memory fifo DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used).
+ * - Sk = 0 (not used).
+ * - Hint = 0 (not used).
+ * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 0 (Indicates a memory fifo packet).
+ * - Dynamic = 0 (not used).
+ * - VC = 0 (not used).
+ * - X,Y,Z = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = 0 (not used).
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg.
+ * - Func_Id = The registration ID of a function to receive control
+ * on this local node to process the packet.
+ * Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] recv_fifo_grp_id Reception fifo group ID
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in] sw_arg User-defined 24 bits to be placed into the
+ * packets (bits 8-31).
+ * \param[in] function_id Function id (8 bit registration ID) of the
+ * function to receive control on this
+ * local node to process packets for this
+ * message.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_fifo_grp_id:
+ * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+*/
+int DMA_LocalMemFifoDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int recv_fifo_grp_id,
+ unsigned int sw_arg,
+ unsigned int function_id,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int msg_len
+ );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Direct Put Broadcast Message
+ *
+ * A torus direct put broadcast message is one that is sent to all of the nodes
+ * in a specified direction along a specified line, its data
+ * is directly put into memory on the nodes along that line by the DMA on those
+ * nodes...it does not go into a reception fifo. Only one hint bit can be
+ * specified, dictating the direction (plus or minus) and line (x, y, or z).
+ *
+ * By default, the packet is included in the checksum. Retransmitted packets
+ * should not be included in the checksum.
+ *
+ * By default, the deterministic bubble normal virtual channel is used.
+ *
+ * A torus direct-put broadcast DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = DMA_CSUM_SKIP.
+ * - Sk = DMA_CSUM_BIT.
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 1 (Indicates a direct-put packet).
+ * - Dynamic = 0 (Deterministic).
+ * - VC = Virtual Channel: Deterministic Bubble Normal.
+ * - X,Y,Z = Set according to the hints:
+ * Two of the directions are set to this node's
+ * coordinates (no movement in those directions).
+ * One direction is set to the dest specified
+ * by the caller.
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = Destination message offset (from the reception
+ * counter's base address). Set to caller's recv_offset.
+ * - rDMA_Counter = Reception counter ID. This counter is located on the
+ * destination node and contains the base address of the
+ * message and the message length. Set based on caller's
+ * recv_ctr_grp_id and recv_ctr_id.
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] dest The final torus destination coordinate
+ * along the line specified by the hints.
+ * Should not exceed the number of nodes in
+ * the direction of travel.
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Only one bit may be specified.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_ctr_grp_id Reception counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] recv_ctr_id Reception counter ID (within the recv counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] recv_offset Offset of the payload from the pa_base
+ * associated with the specified reception
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ * checksum. In the first byte of the torus hardware packet header,
+ * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ * The defaults can be changed by changing DMA_CSUM_SKIP and
+ * DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_ctr_grp_id:
+ * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+*/
+int DMA_TorusDirectPutBcastDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int dest,
+ unsigned int hints,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_ctr_grp_id,
+ unsigned int recv_ctr_id,
+ unsigned int recv_offset,
+ unsigned int msg_len
+ );
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Memory Fifo Broadcast Message
+ *
+ * A torus memory fifo broadcast message is one that is sent to all of the nodes
+ * in a specified direction along a specified line, its data is
+ * put into a reception memory fifo by the DMA on the destination nodes along
+ * that line. Only one hint bit can be specified, dictating the direction
+ * (plus or minus) and line (x, y, or z).
+ *
+ * By default, the packet is included in the checksum. Retransmitted packets
+ * should not be included in the checksum.
+ *
+ * By default, the deterministic bubble normal virtual channel is used.
+ *
+ * A torus memory fifo broadcast DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = DMA_CSUM_SKIP.
+ * - Sk = DMA_CSUM_BIT.
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 0 (Indicates a memory fifo packet).
+ * - Dynamic = 0 (Deterministic).
+ * - VC = Virtual Channel: Deterministic Bubble Normal.
+ * - X,Y,Z = Set according to the hints:
+ * Two of the directions are set to this node's
+ * coordinates (no movement in those directions).
+ * One direction is set to the dest specified
+ * by the caller.
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = 0 (not used).
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg.
+ * - Func_Id = The registration ID of a function to receive control
+ * on the destination node to process the packet.
+ * Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] dest The final torus destination coordinate
+ * along the line specified by the hints.
+ * Should not exceed the number of nodes in
+ * the direction of travel.
+ * \param[in] recv_fifo_grp_id Reception fifo group ID
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Only one bit may be specified.
+ * \param[in] sw_arg User-defined 24 bits to be placed into the
+ * packets (bits 8-31).
+ * \param[in] function_id Function id (8 bit registration ID) of the
+ * function to receive control on the
+ * destination node to process packets for this
+ * message.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ * checksum. In the first byte of the torus hardware packet header,
+ * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ * The defaults can be changed by changing DMA_CSUM_SKIP and
+ * DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_fifo_grp_id:
+ * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+*/
+int DMA_TorusMemFifoBcastDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int dest,
+ unsigned int recv_fifo_grp_id,
+ unsigned int hints,
+ unsigned int sw_arg,
+ unsigned int function_id,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int msg_len
+ );
+
+
+/*!
+ * \brief Set or Change the Hint Bits in a Fifo Descriptor
+ *
+ * \param[in,out] desc Pointer to descriptor to be set or changed.
+ * \param[in] hints Hint bits to be set.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetHints(
+ DMA_InjDescriptor_t *desc,
+ unsigned int hints
+ )
+{
+ SPI_assert( desc != NULL );
+ desc->hwHdr.Hint = hints;
+
+}
+
+
+/*!
+ * \brief Set or Change the Virtual Channel and Dynamic Bit in a Descriptor
+ *
+ * \param[in,out] desc Pointer to descriptor to be set or changed.
+ * \param[in] vc Input virtual channel
+ * - 0 = Virtual channel dynamic 0
+ * - 1 = Virtual channel dynamic 1
+ * - 2 = Virtual channel deterministic bubble
+ * - 3 = Virtual channel deterministic priority
+ *
+ * \return None
+ *
+ * \post The Dynamic bit is set according to the specified virtual channel.
+ *
+ */
+__INLINE__ void DMA_SetVc(
+ DMA_InjDescriptor_t *desc,
+ unsigned int vc
+ )
+{
+ SPI_assert( desc != NULL );
+
+ switch(vc) {
+ case DMA_PACKET_VC_D0:
+ case DMA_PACKET_VC_D1:
+ desc->hwHdr.Dynamic =1;
+ break;
+
+ case DMA_PACKET_VC_BN:
+ case DMA_PACKET_VC_BP:
+ desc->hwHdr.Dynamic =0;
+ break;
+
+ default:
+ SPI_assert(0);
+ }
+ desc->hwHdr.VC = vc;
+
+}
+
+
+/*!
+ * \brief Set Descriptor Pid Bits
+ *
+ * Given a pointer to the descriptor and the receive-side counter group number,
+ * set the Pid0 and Pid1 bits in the torus hardware header portion of the
+ * descriptor.
+ *
+ * \param[in] desc Pointer to injection descriptor
+ * \param[in] g Reception-side counter group number
+ * (0 through DMA_NUM_COUNTER_GROUPS).
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetDescriptorPids(
+ DMA_InjDescriptor_t *desc,
+ unsigned int g
+ )
+{
+ /* Set the pid bits according to the group id g */
+ desc->hwHdr.Pid0 = _GN(g,30);
+ desc->hwHdr.Pid1 = _GN(g,31);
+/* ---------------------------------
+ The above code performs the following:
+
+ switch(g) {
+ case 0:
+ desc->hwHdr.Pid0 = 0;
+ desc->hwHdr.Pid1 = 0;
+ break;
+
+ case 1
+ desc->hwHdr.Pid0 = 0;
+ desc->hwHdr.Pid1 = 1;
+ break;
+
+ case 2
+ desc->hwHdr.Pid0 = 1;
+ desc->hwHdr.Pid1 = 0;
+ break;
+
+ case 3
+ desc->hwHdr.Pid0 = 1;
+ desc->hwHdr.Pid1 = 1;
+ break;
+
+ default:
+ SPI_assert(0);
+
+ }
+ --------------------------------- */
+}
+
+
+/*!
+ * \brief Set or Change the Number of Chunks in a Fifo Descriptor
+ *
+ * \param[in,out] desc Pointer to the descriptor to be set or
+ * changed.
+ * \param[in] packet_chunks Number of 32B chunks in the packet
+ * (1 through 8).
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetChunks(
+ DMA_InjDescriptor_t *desc,
+ unsigned int packet_chunks
+ )
+{
+ SPI_assert( desc != NULL );
+ SPI_assert( packet_chunks >=1);
+ SPI_assert( packet_chunks <=8);
+ desc->hwHdr.Chunks = (packet_chunks-1) ;
+}
+
+
+/*!
+ * \brief Set or Change the Message Length in a Fifo Descriptor
+ *
+ * \param[in,out] desc Pointer to the descriptor to be set or changed.
+ * \param[in] msg_len Number of bytes in the payload of the message.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetMessageLength(
+ DMA_InjDescriptor_t *desc,
+ unsigned int msg_len
+ )
+{
+ SPI_assert( desc != NULL );
+
+ desc->msg_length= msg_len;
+}
+
+
+/*!
+ * \brief Change the Checksum Characteristics in a Fifo Descriptor
+ *
+ * \param[in,out] desc Pointer to the descriptor to be changed.
+ * \param[in] csum_skip The number of 2-bytes to skip in the checksum
+ * (7 bits).
+ * \param[in] skip The checksum skip attribute:
+ * 0 = The packets participates in the injection
+ * checksum.
+ * 1 = The packet does not participate in the
+ * injection checksum.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_SetInjCsum(
+ DMA_InjDescriptor_t *desc,
+ unsigned int csum_skip,
+ unsigned int skip
+ )
+{
+ SPI_assert( desc != NULL );
+ SPI_assert( skip <=1 );
+
+ desc->hwHdr.CSum_Skip = csum_skip;
+ desc->hwHdr.Sk = skip;
+
+}
+
+
+/*!
+ * \brief Determine the Number of Packet Chunks for the First Packet of a
+ * Message
+ *
+ * Compute the best (largest) packet size in units of 32B chunks given the
+ * message length.
+ *
+ * \param[in] msg_len Message length
+ *
+ * \retval numPacketChunks Number of 32B chunks needed in the first packet
+ * of a message whose length is msg_len.
+ * This will be a number from 1 through 8.
+ * \retval 0 This is considered an error, resulting from a
+ * msg_len = 0. The DMA must send at least 1 byte.
+ */
+__INLINE__ int DMA_PacketChunks(
+ unsigned msg_len
+ )
+{
+ /* Do most common case first */
+ if (msg_len > 208) return 8;
+
+ /* Error case...the DMA must send at least one byte of data */
+ SPI_assert( msg_len > 0);
+
+ /* Basically add in the packet header and round to 32B multiple */
+ {
+ int chunks = ( msg_len - 1 + sizeof(DMA_PacketHeader_t) ) / 32;
+ return (1+chunks);
+ }
+
+}
+
+
+/*!
+ * \brief Zero Out All Fields a Descriptor
+ *
+ * \param[in] desc Pointer to descriptor to be zero'd.
+ *
+ * \post The descriptor is zero'd.
+ *
+ */
+__INLINE__ void DMA_ZeroOutDescriptor(
+ DMA_InjDescriptor_t *desc
+ )
+{
+ /*
+ * Possible optimizations:
+ * There are 32 bytes in the descriptor and it should be L1 aligned.
+ * SPI_assert(( desc & 0x000000FF) == 0); // check alignment, not needed if can't
+ * // easily use double hummer.
+ * _bgp_dcache_zero_line(desc); //Not allowed with SWOA
+ * Should be a better way to do this.
+ */
+
+ SPI_assert( desc != NULL );
+
+ {
+ int *addr = (int *) desc ;
+
+ /* Generates 8 stw's */
+ addr[0] = 0;
+ addr[1] = 0;
+ addr[2] = 0;
+ addr[3] = 0;
+ addr[4] = 0;
+ addr[5] = 0;
+ addr[6] = 0;
+ addr[7] = 0;
+ }
+
+}
+
+
+
+/*!
+ * \brief Update the Offset and Length in a Descriptor
+ *
+ * \param[in] desc Pointer to descriptor to be updated.
+ * \param[in] offset The new offset value.
+ * \param[in] length The new length value.
+ *
+ * \post The descriptor is updated.
+ *
+ */
+__INLINE__ void DMA_DescriptorUpdateOffsetLength (DMA_InjDescriptor_t *desc,
+ unsigned offset,
+ unsigned length)
+{
+ desc->base_offset = offset;
+ desc->msg_length = length;
+}
+
+
+
+/*!
+ * \brief Set the Put Offset in a Descriptor
+ *
+ * This sets the "put_offset" field of the software packet header in the
+ * provided descriptor. This field is placed into the packet header by
+ * the DMA. In the first packet, this field is placed into the packet
+ * unchanged. In each subsequent packet, the DMA adds to this field
+ * the number of payload bytes from the previous packet.
+ *
+ * \param[in] desc Pointer to descriptor.
+ * \param[in] offset The offset value to be set.
+ *
+ * \post The Put Offset in the descriptor is set.
+ *
+ */
+__INLINE__ void DMA_DescriptorSetPutOffset (DMA_InjDescriptor_t *desc,
+ unsigned offset)
+{
+ desc->hwHdr.Put_Offset = offset;
+}
+
+__END_DECLS
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_Fifo.h b/arch/powerpc/include/spi/DMA_Fifo.h
new file mode 100644
index 00000000000000..c8e7f9e0e66805
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Fifo.h
@@ -0,0 +1,1011 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef _DMA_FIFO_H_ /* Prevent multiple inclusion */
+#define _DMA_FIFO_H_
+
+
+/*!
+ * \file spi/DMA_Fifo.h
+ *
+ * \brief DMA SPI Fifo Definitions and Inline Functions Common to Injection
+ * and Reception Fifos
+ *
+ * This include file contains data structures and inline functions that are
+ * common among injection and reception fifos. The inlines are used to
+ * interface with the fifos at the lowest level.
+ *
+ * There are two levels of access: hardware and software. For direct
+ * hardware access, the DMA_FifoHW_t structure describes fields that reside
+ * in the "hardware fifo" in DMA SRAM. For normal software access, the
+ * DMA_Fifo_t structure contains a pointer to the hardware structure,
+ * shadows (snapshot copies) of the fields in the hardware structure, and
+ * size information calculated from the shadows.
+ *
+ * \verbatim Picture of fifo structures
+
+ ========DDR MEMORY===================|==========DMA SRAM MEMORY==========
+ ------------------------------ |
+ | DMA_Fifo_t | |
+ | | |
+ | Software Fifo | |
+ | | |
+ | | | -----------------------------
+ | fifo_hw_ptr--------------|-------|---->| DMA_FifoHW_t |
+ | | | | |
+ | | | | Hardware Fifo |
+ | Shadow Pointers | | -----------------------------
+ | . | |
+ ------------------------------ |
+
+ \endverbatim
+ *
+ * For normal messaging software, one should access the DMA using the
+ * DMA_Fifo_t, DMA_InjFifo_t, or DMA_RecFifo_t structures since
+ * they maintain shadows. This include file contains inline functions that
+ * operate on the DMA_Fifo_t for this purpose. Functions include:
+ * - get va_start, va_head, va_tail, va_end, fifo size, fifo free_space
+ * - set va_head, va_tail
+ * - update fifo free-space based upon current shadows
+ *
+ * However, for bringup or diagnostic software, there is a need for direct
+ * access to the hardware fifos. This include file contains functions that
+ * operate on the DMA_FifoHW_t for this purpose. Functions include:
+ * - get pa_start, pa_head, pa_tail, pa_end
+ * - set pa_start, pa_head, pa_tail, pa_end
+ * While it probably doesn't make sense to have a stand-alone
+ * DMA_FifoSetStartPa() or DMA_FifoSetEndPa() since this dynamically
+ * messes up the fifo, causing unpredictable results. But bringup or
+ * diagnostic software will need this (with dma disabled, or the fifo
+ * disabled). Therefore we provide direct interfaces using physical
+ * addresses and no shadows (for speed).
+ *
+ * Definitions:
+ * - A fifo represents a contiguous block of DDR memory
+ * - A fifo has a starting address and an ending address (defines the memory
+ * block)
+ * - An injection fifo is a series of 32-byte descriptors.
+ * - Injection consists of copying a 32-byte descriptor into the next available
+ * slot (pointed to by the tail) and incrementing the tail pointer.
+ * - The DMA engine asynchronously processes descriptors, beginning with the
+ * descriptor pointed to by head, and ending with the descriptor just prior
+ * to tail.
+ * - There are injection (DMA InjFifo) and reception (DMA RecFifo) fifos
+ * (separate interfaces)
+ * - There are DMA_NUM_INJ_FIFO_GROUPS injection fifo groups
+ * - There are DMA_NUM_INJ_FIFOS_PER_GROUP injection fifos per group
+ * - Thus, there are DMA_NUM_INJ_FIFOS injection fifos per node
+ * - There are DMA_NUM_REC_FIFO_GROUPS reception fifo groups
+ * - There are DMA_NUM_REC_FIFOS_PER_GROUP reception fifos per group
+ * - Thus, there are DMA_NUM_REC_FIFOS reception fifos per node
+ * - A "shadow" refers to a copy of the elements of the fifo (start, end, head,
+ * tail) that is maintained by these inline functions. The shadows may be
+ * used to calculate other values such as free space. The shadows are updated
+ * by these inlines whenever the hardware fifo is read or written.
+ *
+ * \note Memory consistency/coherency inside these inlines is achieved using
+ * mbar and msync.
+ *
+ * MBAR is used to make sure that all writes to memory issued by the
+ * calling core have been accepted by the memory system before
+ * continuing. This guarantees that writes and reads to/from different
+ * addresses to go in defined order.
+ *
+ * MBAR EXAMPLE 1: When a store is done to DMA SRAM, it may not complete
+ * for a period of time. If a counter value is set, and then an injection
+ * fifo tail pointer is set, DMA may see the tail pointer update and begin
+ * the operation before the counter value has been set. Inserting an mbar
+ * between the setting of the counter and the setting of the tail pointer
+ * guarantees that the counter will be set before the tail pointer is
+ * updated.
+ *
+ * MBAR EXAMPLE 2: A counter hits zero. We process the hit-zero and write
+ * a "clear hit zero" to DMA SRAM, and then go read that counter's hit-zero
+ * status (different address). The hit-zero status will still indicate
+ * that it hit zero, even though we have already processed it, unless an
+ * mbar is inserted between clearing the hit-zero and reading the hit-zero
+ * status.
+ *
+ * MBAR PHILOSOPHY: After DMA SRAM is updated in the DMA inline functions,
+ * they always do at least an mbar (possibly an msync instead...see below).
+ *
+ * MSYNC does what mbar does, plus ensures consistency across cores. That
+ * is, it waits for snoops (invalidations of L1 cache) on the other cores
+ * to complete before continuing. This guarantees that all of the cores
+ * will see a consistent view of memory after the msync.
+ *
+ * MSYNC EXAMPLE: When a reception counter has hit zero, we assume the
+ * DMA'd data is available to be read by any core. However, old copies of
+ * that data may still be in the L1 caches. Inserting an msync after
+ * detecting that a counter has hit zero guarantees that the old data has
+ * been removed from the L1 caches.
+ *
+ * MSYNC PHILOSOPHY: After the inline functions detect that a counter has
+ * hit zero, they always do an msync.
+ *
+ * SPECULATIVE EXECUTION OF MSYNC: There are cases where msync is done
+ * conditionally. The CPU will begin execution of both sides of the
+ * condition before the result of the condition has been determined.
+ * Then, it will cancel the execution of one side once the result of the
+ * condition has been determined. This speculation is unwanted when
+ * the first instruction on one side of the condition is msync because
+ * cancelling an msync is similar to executing the complete msync.
+ * To avoid this speculative execution of msync, we call
+ * _bgp_msync_nonspeculative(). This will trick the CPU so it won't begin
+ * the msync until the result of the condition is known.
+ *
+ * CALLER ADVICE: Users of these functions should not need to do
+ * mbar/msync themselves, unless they are doing something like the
+ * following: Read a counter and operate on the result when the counter
+ * hasn't reached zero. The caller will need to perform an msync after
+ * reading the counter in order to ensure that snoops have completed
+ * on all CPUs before operating on the DMA'd data.
+ */
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ * placed in the translation unit (eg. xxx.c)for use within that unit.
+ * The function is not externalized for use by another unit...we want this
+ * so we don't end up with multiple units exporting the same function,
+ * which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+ \verbatim
+ #ifndef INLINE
+ # define INLINE extern inline
+ #endif
+ INLINE int max(int a, int b) {
+ return a > b ? a : b;
+ }
+ \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+ \verbatim
+ #define INLINE
+ #include "header.h"
+ \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+
+#include <spi/DMA_Assert.h>
+#include <spi/kernel_interface.h>
+
+
+
+/*!
+ * \brief Number of fifo groups
+ */
+#define DMA_NUM_FIFO_GROUPS 4
+
+
+/*!
+ * \brief Hardware DMA Fifo
+ *
+ * This maps the hardware fifo (the DMA SRAM) for a fifo. These fields are
+ * common for injection and reception fifos.
+ *
+ * The fifo represents a physically contiguous block of memory.
+ *
+ */
+typedef struct DMA_FifoHW_t
+{
+ volatile unsigned pa_start; /*!< RW fifo start address.
+ 16B-aligned 4-bit shifted address. */
+
+ volatile unsigned pa_end; /*!< RW fifo end address.
+ 16B-aligned 4-bit shifted address. */
+
+ volatile unsigned pa_head; /*!< RW fifo head pointer.
+ 16B-aligned 4-bit shifted address.
+ Injection fifo head moved by DMA.
+ Reception fifo head moved by cores.
+ Remote get injection fifo head moved
+ by DMA. */
+
+ volatile unsigned pa_tail; /*!< RW fifo tail pointer.
+ 16B-aligned 4-bit shifted address.
+ Injection fifo tail moved by cores.
+ Reception fifo tail moved by DMA.
+ Remote get injection fifo tail moved
+ by DMA. */
+}
+DMA_FifoHW_t;
+
+
+/*!
+ * \brief Software DMA Fifo structure
+ *
+ * This structure contains a pointer to the hardware fifo, and other fields that
+ * describe software's view of the fifo. These fields are common for injection
+ * and reception fifos.
+ *
+ * \todo Some more careful thought should be given how to group these so as to
+ * get best memory system performance.
+ * eg. Probably want to ALIGN_L3_CACHE the fifo_hw_ptr.
+ * Might want to have an assert to check that sizeof( DMA_Fifo_t)
+ * is 32.
+ * COMMENT: I think below definition puts the entire structure in one
+ * L1 line.
+ */
+typedef struct DMA_Fifo_t
+{
+ DMA_FifoHW_t *fifo_hw_ptr; /*!< Pointer to hardware fifo. */
+
+ unsigned int free_space; /*!< Shadow of how much free space is in the
+ fifo, in units of 16B quads. */
+
+ unsigned int fifo_size; /*!< Shadow of how much total space is in the
+ fifo, in units of 16B quads. */
+
+ unsigned int pa_start; /*!< Physical address of the start. (shadow)
+ 16B-aligned 4-bit shifted address.
+ Enables simple calculation of va_head,
+ va_tail, and va_end. */
+ /*!
+ * \note The following 4 fields are shadows of the hardware fifo.
+ * They should be in the same L1 cache line for performance.
+ * They are updated by the inline functions in this file upon each
+ * read or write to the fifo.
+ */
+ void *va_start; /*!< Shadow of the virtual address start of
+ the fifo. Must be 32B aligned. */
+
+ void *va_head; /*!< Shadow of the virtual address head of
+ the fifo. */
+
+ void *va_tail; /*!< Shadow of the virtual address tail of
+ the fifo. */
+
+ void *va_end; /*!< Shadow of the virtual address end of
+ the fifo. Must be 32B aligned. */
+
+}
+/*!
+ * With above, there should be 8 fields x 4 bytes/field = 32 bytes in the
+ * structure. Below ensures these 32 bytes are in the same cache line.
+ */
+ALIGN_L1D_CACHE DMA_Fifo_t;
+
+/*
+ *------------------------------------------------------------------------------
+ * The following functions operate on fields in the hardware and software fifo
+ * structures.
+ *------------------------------------------------------------------------------
+ */
+
+
+/*!
+ * \brief Update DMA Fifo Free Space from the Shadow
+ *
+ * Force a recalculation of a DMA fifo's amount of free space, given a software
+ * fifo structure.
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \return None
+ *
+ * \note WARNING: The calculation is based on the current shadow values of the
+ * head and tail, not the actual hardware values.
+ *
+ */
+__INLINE__ void DMA_FifoUpdateFreeSpaceFromShadow(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+ /*
+ * Recompute the amount of free space in the fifo, given the current shadows.
+ */
+
+ if ( f_ptr->va_tail >= f_ptr->va_head)
+ {
+ f_ptr->free_space = f_ptr->fifo_size -
+ ( ( (unsigned)(f_ptr->va_tail) -
+ (unsigned)(f_ptr->va_head) ) >> 4 );
+ }
+ else
+ {
+ f_ptr->free_space = ( (unsigned)(f_ptr->va_head) -
+ (unsigned)(f_ptr->va_tail) ) >> 4;
+ }
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Start Virtual Address from the Shadow
+ *
+ * Get a DMA fifo's "start" virtual address, given a software fifo structure
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \retval va_start The virtual address of the start of the fifo
+ *
+ * \note WARNING: This function does not read the DMA SRAM, but instead returns
+ * the current shadow va_start. To actually issue a read to the
+ * DMA, use DMA_FifoGetStartPa().
+ */
+__INLINE__ void * DMA_FifoGetStartFromShadow(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+ return f_ptr->va_start;
+}
+
+
+/*!
+ * \brief Get DMA Fifo Head Virtual Address
+ *
+ * Get a DMA fifo's "head" virtual address, given a software fifo structure
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \retval va_head The virtual address of the head of the fifo
+ *
+ * \post va_head is recalculated from the current hardware head, updated in
+ * the software fifo structure, and returned. Additionally, the free
+ * space in the software fifo structure is updated.
+ *
+ */
+__INLINE__ void * DMA_FifoGetHead(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ unsigned int val;
+
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+ /* Read the DMA to get the head.
+ * Recompute va_head based upon the va_start and the current hardware head.
+ * Update free_space.
+ */
+
+ val = f_ptr->fifo_hw_ptr->pa_head;
+
+ f_ptr->va_head = (char*)( (unsigned)f_ptr->va_start +
+ ( ( val - f_ptr->pa_start ) << 4 ) );
+
+ DMA_FifoUpdateFreeSpaceFromShadow( f_ptr );
+
+ return f_ptr->va_head;
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Head Virtual Address Without Updating Free Space
+ *
+ * Get a DMA fifo's "head" virtual address, given a software fifo structure,
+ * without updating the fifo's free space. It is up to the caller to ensure
+ * this update occurs later, if necessary.
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \retval va_head The virtual address of the head of the fifo
+ *
+ * \post va_head is recalculated from the current hardware head, updated in
+ * the software fifo structure, and returned.
+ *
+ */
+__INLINE__ void * DMA_FifoGetHeadNoFreeSpaceUpdate(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ unsigned int val;
+
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+ /* Read the DMA to get the head.
+ * Recompute va_head based upon the va_start and the current hardware head.
+ */
+
+ val = f_ptr->fifo_hw_ptr->pa_head;
+
+ f_ptr->va_head = (char*)( (unsigned)f_ptr->va_start +
+ ( ( val - f_ptr->pa_start ) << 4 ) );
+
+ return f_ptr->va_head;
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Tail Virtual Address
+ *
+ * Get a DMA fifo's "tail" virtual address, given a software fifo structure
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \retval va_tail The virtual address of the tail of the fifo
+ *
+ * \post va_tail is recalculated from the current hardware tail, updated in
+ * the software fifo structure, and returned. Additionally, the free
+ * space in the software fifo structure is updated.
+ *
+ */
+__INLINE__ void * DMA_FifoGetTail(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ unsigned int val;
+
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+ /* Read the DMA to get the tail.
+ * Recompute va_tail based upon the va_start and the current hardware tail.
+ * Update free_space.
+ */
+
+ val = f_ptr->fifo_hw_ptr->pa_tail;
+
+ f_ptr->va_tail = (char*)( (unsigned)f_ptr->va_start +
+ ( ( val - f_ptr->pa_start ) << 4 ) );
+
+ DMA_FifoUpdateFreeSpaceFromShadow( f_ptr );
+
+ return f_ptr->va_tail;
+
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Tail Virtual Address Without Updating Free Space
+ *
+ * Get a DMA fifo's "tail" virtual address, given a software fifo structure,
+ * without updating the fifo's free space. It is up to the caller to
+ * invoke DMA_FifoUpdateFreeSpaceFromShadow() at a later time.
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \retval va_tail The virtual address of the tail of the fifo
+ *
+ * \post va_tail is recalculated from the current hardware tail, updated in
+ * the software fifo structure, and returned.
+ *
+ */
+__INLINE__ void * DMA_FifoGetTailNoFreeSpaceUpdate(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ unsigned int val;
+
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+ /* Read the DMA to get the tail.
+ * Recompute va_tail based upon the va_start and the current hardware tail.
+ */
+
+ val = f_ptr->fifo_hw_ptr->pa_tail;
+
+ f_ptr->va_tail = (char*)( (unsigned)f_ptr->va_start +
+ ( ( val - f_ptr->pa_start ) << 4 ) );
+
+ return f_ptr->va_tail;
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo Tail Virtual Address from Shadow
+ *
+ * Get a DMA fifo's "tail" virtual address, given a software fifo structure
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \retval va_tail The virtual address of the tail of the fifo
+ *
+ * \post va_tail is obtained from the shadow, NOT recalculated from the
+ * current hardware tail. The free space in the software fifo
+ * structure is NOT updated.
+ *
+ */
+__INLINE__ void * DMA_FifoGetTailFromShadow(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+ return f_ptr->va_tail;
+
+}
+
+
+/*!
+ * \brief Get DMA Fifo End Virtual Address from the Shadow
+ *
+ * Get a DMA fifo's "end" virtual address, given a software fifo structure
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \retval va_end The virtual address of the end of the fifo
+ *
+ * \note WARNING: This function does not read the DMA SRAM, but instead returns
+ * the current shadow va_end. To actually issue a read to the
+ * DMA, use DMA_FifoGetEndPa().
+ */
+__INLINE__ void * DMA_FifoGetEndFromShadow(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+ return f_ptr->va_end;
+}
+
+
+/*!
+ * \brief Get DMA Fifo Size
+ *
+ * Get a DMA fifo's size, given a software fifo structure
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \retval size The size of the DMA fifo, in units of 16B quads.
+ *
+ * \note WARNING: This function does not calculate the size based on the DMA
+ * SRAM's current start and end values, but instead returns the
+ * size that was calculated when the fifo was initialized.
+ */
+__INLINE__ unsigned int DMA_FifoGetSize(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+
+ return f_ptr->fifo_size;
+}
+
+
+/*!
+ * \brief Get DMA Fifo Free Space With No Update Calculation
+ *
+ * Get a DMA fifo's amount of free space, given a software fifo structure.
+ * Do not perform update calculations.
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ *
+ * \retval freeSpace The amount of free space in the fifo, in units of
+ * 16B quads.
+ */
+__INLINE__ unsigned int DMA_FifoGetFreeSpaceNoUpdateCalculation(
+ DMA_Fifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+
+ return f_ptr->free_space;
+}
+
+
+/*!
+ * \brief Get DMA Fifo Free Space
+ *
+ * Get a DMA fifo's amount of free space, given a software fifo structure
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ * \param[in] read_head Indicates whether to read the head from the hardware
+ * fifo before calculating the free space.
+ * - 1 means to read the hardware head
+ * - 0 means to use the current head shadow
+ * \param[in] read_tail Indicates whether to read the tail from the hardware
+ * fifo before calculating the free space.
+ * - 1 means to read the hardware tail
+ * - 0 means to use the current tail shadow
+ *
+ * \retval freeSpace The amount of free space in the fifo, in units of
+ * 16B quads.
+ *
+ * \note If both read_head and read_tail are false, the amount of free space is
+ * calculated based on the current shadow values of head and tail.
+ */
+__INLINE__ unsigned int DMA_FifoGetFreeSpace(
+ DMA_Fifo_t *f_ptr,
+ unsigned int read_head,
+ unsigned int read_tail
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+ SPI_assert( read_head == 1 || read_head == 0 );
+ SPI_assert( read_tail == 1 || read_tail == 0 );
+
+ /*
+ * If both read_head and read_tail are 0, return the current shadow.
+ * If read_head != 0, read the head of the fifo first and recompute free space.
+ * If read_tail != 0, read the tail of the fifo first and recompute free space.
+ */
+
+ if ( (read_head == 0) && ( read_tail == 0) )
+ DMA_FifoUpdateFreeSpaceFromShadow( f_ptr);
+ else
+ {
+ if ( read_head == 1) DMA_FifoGetHead(f_ptr); /* This does an update */
+ /* of the free space. */
+ if ( read_tail == 1) DMA_FifoGetTail(f_ptr); /* This does an update */
+ /* of the free space. */
+ }
+
+ return f_ptr->free_space;
+
+}
+
+
+/*!
+ * \brief Set DMA Fifo Head
+ *
+ * Set a DMA fifo's "head", given a software fifo structure
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ * \param[in] va_head Virtual address of the head to be set
+ *
+ * \return None
+ *
+ * \post va_head is set in both the hardware and software fifo structures,
+ * and the fifo free space is recalculated.
+ *
+ * \note Normally, for an injection fifo, the dma manipulates the head, but in
+ * optimized persistant communications the core can do it if it is sure
+ * the fifo is empty at the time this is called.
+ */
+__INLINE__ void DMA_FifoSetHead(
+ DMA_Fifo_t *f_ptr,
+ void *va_head
+ )
+{
+ unsigned int pa_head;
+
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+ SPI_assert( va_head >= f_ptr->va_start &&
+ va_head < f_ptr->va_end );
+
+ /*
+ * Calculate new pa_head based on the shadow pa_start and va_start.
+ */
+ pa_head = f_ptr->pa_start + ( ( (unsigned)va_head -
+ (unsigned)f_ptr->va_start ) >> 4 );
+
+ /*
+ * Set the hardware head
+ */
+ f_ptr->fifo_hw_ptr->pa_head = pa_head;
+ _bgp_mbar();
+
+ /*
+ * Update the software fifo structure's head and free space.
+ */
+ f_ptr->va_head = va_head;
+
+ DMA_FifoUpdateFreeSpaceFromShadow( f_ptr );
+
+}
+
+
+/*!
+ * \brief Set DMA Fifo Tail
+ *
+ * Set a DMA fifo's "tail", given a software fifo structure
+ *
+ * \param[in] f_ptr Pointer to the software fifo structure
+ * \param[in] va_tail Virtual address of the tail to be set
+ *
+ * \return None
+ *
+ * \post va_tail is set in both the hardware and software fifo structures,
+ * and the fifo free space is recalculated.
+ *
+ */
+__INLINE__ void DMA_FifoSetTail(
+ DMA_Fifo_t *f_ptr,
+ void *va_tail
+ )
+{
+ unsigned int pa_tail;
+
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->fifo_hw_ptr != NULL );
+ SPI_assert( va_tail >= f_ptr->va_start &&
+ va_tail < f_ptr->va_end );
+
+ /*
+ * Calculate new pa_tail based on the shadow pa_start and va_start.
+ */
+ pa_tail = f_ptr->pa_start + ( ( (unsigned)va_tail -
+ (unsigned)f_ptr->va_start ) >> 4 );
+
+ /*
+ * Set the hardware tail
+ */
+ f_ptr->fifo_hw_ptr->pa_tail = pa_tail;
+ _bgp_mbar();
+
+ /*
+ * Update the software fifo structure's tail and free space.
+ */
+ f_ptr->va_tail = va_tail;
+
+ DMA_FifoUpdateFreeSpaceFromShadow( f_ptr );
+
+}
+
+
+
+
+/*
+ *------------------------------------------------------------------------------
+ * The following functions operate directly on the hardware fifo. Normally,
+ * users should use the software fifo routines (previously defined), but for
+ * bringup or diagnostics, it may be desirable to use these.
+ *------------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Set DMA Hardware Fifo Start
+ *
+ * Set a DMA fifo's "start", given a hardware fifo structure
+ *
+ * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure
+ * \param[in] pa_start Physical address of the start to be set.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the fifo to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ */
+__INLINE__ void DMA_FifoSetStartPa(
+ DMA_FifoHW_t *fifo_hw_ptr,
+ unsigned int pa_start
+ )
+{
+ SPI_assert( fifo_hw_ptr != NULL );
+
+ fifo_hw_ptr->pa_start = pa_start;
+
+ _bgp_mbar();
+
+}
+
+
+/*!
+ * \brief Set DMA Hardware Fifo Head
+ *
+ * Set a DMA fifo's "head", given a hardware fifo structure
+ *
+ * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure
+ * \param[in] pa_head Physical address of the head to be set.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the fifo to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ */
+__INLINE__ void DMA_FifoSetHeadPa(
+ DMA_FifoHW_t *fifo_hw_ptr,
+ unsigned int pa_head
+ )
+{
+ SPI_assert( fifo_hw_ptr != NULL );
+
+ fifo_hw_ptr->pa_head = pa_head;
+
+ _bgp_mbar();
+
+}
+
+
+/*!
+ * \brief Set DMA Hardware Fifo Tail
+ *
+ * Set a DMA fifo's "tail", given a hardware fifo structure
+ *
+ * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure
+ * \param[in] pa_tail Physical address of the tail to be set.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the fifo to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ */
+__INLINE__ void DMA_FifoSetTailPa(
+ DMA_FifoHW_t *fifo_hw_ptr,
+ unsigned int pa_tail
+ )
+
+{
+ SPI_assert( fifo_hw_ptr != NULL );
+
+ fifo_hw_ptr->pa_tail = pa_tail;
+
+ _bgp_mbar();
+
+}
+
+
+/*!
+ * \brief Set DMA Hardware Fifo End
+ *
+ * Set a DMA fifo's "end", given a hardware fifo structure
+ *
+ * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure
+ * \param[in] pa_end Physical address of the end to be set.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the fifo to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+ */
+__INLINE__ void DMA_FifoSetEndPa(
+ DMA_FifoHW_t *fifo_hw_ptr,
+ unsigned int pa_end
+ )
+{
+ SPI_assert( fifo_hw_ptr != NULL );
+
+ fifo_hw_ptr->pa_end = pa_end;
+
+ _bgp_mbar();
+
+}
+
+
+/*!
+ * \brief Get DMA Hardware Fifo Start
+ *
+ * Get a DMA fifo's "start", given a hardware fifo structure
+ *
+ * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure
+ *
+ * \retval pa_start Physical address of the fifo start.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ */
+__INLINE__ unsigned int DMA_FifoGetStartPa(
+ DMA_FifoHW_t *fifo_hw_ptr
+ )
+{
+ SPI_assert( fifo_hw_ptr != NULL );
+
+ return fifo_hw_ptr->pa_start;
+}
+
+
+/*!
+ * \brief Get DMA Hardware Fifo Head
+ *
+ * Get a DMA fifo's "head", given a hardware fifo structure
+ *
+ * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure
+ *
+ * \retval pa_head Physical address of the fifo head.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ */
+__INLINE__ unsigned int DMA_FifoGetHeadPa(
+ DMA_FifoHW_t *fifo_hw_ptr
+ )
+{
+ SPI_assert( fifo_hw_ptr != NULL );
+
+ return fifo_hw_ptr->pa_head;
+}
+
+
+/*!
+ * \brief Get DMA Hardware Fifo Tail
+ *
+ * Get a DMA fifo's "tail", given a hardware fifo structure
+ *
+ * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure
+ *
+ * \retval pa_tail Physical address of the fifo tail.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ */
+__INLINE__ unsigned int DMA_FifoGetTailPa(
+ DMA_FifoHW_t *fifo_hw_ptr
+ )
+{
+ SPI_assert( fifo_hw_ptr != NULL );
+
+ return fifo_hw_ptr->pa_tail;
+}
+
+
+/*!
+ * \brief Get DMA Hardware Fifo End
+ *
+ * Get a DMA fifo's "end", given a hardware fifo structure
+ *
+ * \param[in] fifo_hw_ptr Pointer to the hardware fifo structure
+ *
+ * \retval pa_end Physical address of the fifo end.
+ * 16B-aligned 4-bit shifted physical address.
+ *
+ * \return None
+ *
+ */
+__INLINE__ unsigned int DMA_FifoGetEndPa(
+ DMA_FifoHW_t *fifo_hw_ptr
+ )
+{
+ SPI_assert( fifo_hw_ptr != NULL );
+
+ return fifo_hw_ptr->pa_end;
+}
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_InjFifo.h b/arch/powerpc/include/spi/DMA_InjFifo.h
new file mode 100644
index 00000000000000..777a21cceaa178
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_InjFifo.h
@@ -0,0 +1,2475 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef _DMA_INJFIFO_H_ /* Prevent multiple inclusion */
+#define _DMA_INJFIFO_H_
+
+
+/*!
+ * \file spi/DMA_InjFifo.h
+ *
+ * \brief DMA SPI Injection Fifo Definitions and Inline Functions
+ *
+ * This include file contains inline functions that are used to interface with
+ * BG/P DMA injection fifos at the lowest level.
+ * Functions include
+ * - initialize
+ * - get fifo start, head, tail, end, size, free space, descriptor count
+ * - set fifo head, tail, start PA, head PA, tail PA, end PA
+ * - increment tail
+ * - inject descriptor(s)
+ * - query status: not empty, available, threshold crossed, activated,
+ * descriptor done.
+ * - set status: clear threshold crossed, activate, deactivate
+ *
+ * Data structures are defined to manipulate the injection fifos:
+ * - An injection fifo group structure defining a group of injection fifos
+ * - Within the group are injection fifo structures
+ * - Within each injection fifo structure is a software fifo structure
+ * - Each software fifo structure points to its corresponding hardware
+ * fifo structure in the DMA SRAM
+ *
+ * \verbatim Picture of data structures:
+
+ ========DDR MEMORY===================|==========DMA SRAM MEMORY==========
+ ------------------------------ |
+ | DMA_InjFifoGroup_t | |
+ | | | -----------------------------
+ | status --------------------|-------|---->| DMA_InjFifoStatus_t |
+ | fifo[0..31] | | -----------------------------
+ | ------------------------ | |
+ | | DMA_InjFifo_t | | |
+ | | | | |
+ | 0 | ------------------- | | | -----------------------------
+ | | | DMA_Fifo_t |-|-|-------|---->| DMA_FifoHW_t |
+ | | ------------------- | | | -----------------------------
+ | ------------------------ | |
+ | . | |
+ | . | |
+ | . | |
+ | ------------------------ | |
+ | | DMA_InjFifo_t | | |
+ | | | | |
+ |31 | ------------------- | | | -----------------------------
+ | | | DMA_Fifo_t |-|-|-------|---->| DMA_FifoHW_t |
+ | | ------------------- | | | -----------------------------
+ | ------------------------ | |
+ ------------------------------ |
+
+ \endverbatim
+ *
+ * Definitions:
+ * - A fifo represents a contiguous block of DDR memory
+ * - A fifo has a starting address and an ending address (defines the memory
+ * block)
+ * - An injection fifo is a series of 32-byte descriptors. There is a count
+ * of the number of descriptors ever injected into this fifo. It will never
+ * wrap in the expected lifetime of a job.
+ * - Injection consists of copying a 32-byte descriptor into the next available
+ * slot (pointed to by the tail), incrementing the tail pointer, and
+ * incrementing the descriptor count for the fifo.
+ * - The DMA engine asynchronously processes descriptors, beginning with the
+ * descriptor pointed to by head, and ending with the descriptor just prior
+ * to tail.
+ * - There are injection (DMA InjFifo) and reception (DMA RecFifo) fifos
+ * (separate interfaces)
+ * - There are DMA_NUM_INJ_FIFO_GROUPS injection fifo groups
+ * - There are DMA_NUM_INJ_FIFOS_PER_GROUP injection fifos per group
+ * - Thus, there are DMA_NUM_INJ_FIFOS injection fifos per node
+ * - There are DMA_NUM_REC_FIFO_GROUPS reception fifo groups
+ * - There are DMA_NUM_REC_FIFOS_PER_GROUP reception fifos per group
+ * - Thus, there are DMA_NUM_REC_FIFOS reception fifos per node
+ * - A "shadow" refers to a copy of the elements of the fifo (start, end, head,
+ * tail) that is maintained by these inline functions. The shadows may be
+ * used to calculate other values such as free space. The shadows are updated
+ * by these inlines whenever the hardware fifo is read or written.
+ *
+ * \note These functions do not try to detect things that software shouldn't do,
+ * like injecting a descriptor into a remote_get fifo, since the hardware
+ * doesn't distinguish between remote get fifos and normal injection
+ * fifos. That sort of checking should be done in a higher level.
+ *
+ * \note Memory consistency/coherency inside these inlines is achieved using
+ * mbar and msync.
+ *
+ * MBAR is used to make sure that all writes to memory issued by the
+ * calling core have been accepted by the memory system before
+ * continuing. This guarantees that writes and reads to/from different
+ * addresses to go in defined order.
+ *
+ * MBAR EXAMPLE 1: When a store is done to DMA SRAM, it may not complete
+ * for a period of time. If a counter value is set, and then an injection
+ * fifo tail pointer is set, DMA may see the tail pointer update and begin
+ * the operation before the counter value has been set. Inserting an mbar
+ * between the setting of the counter and the setting of the tail pointer
+ * guarantees that the counter will be set before the tail pointer is
+ * updated.
+ *
+ * MBAR EXAMPLE 2: A counter hits zero. We process the hit-zero and write
+ * a "clear hit zero" to DMA SRAM, and then go read that counter's hit-zero
+ * status (different address). The hit-zero status will still indicate
+ * that it hit zero, even though we have already processed it, unless an
+ * mbar is inserted between clearing the hit-zero and reading the hit-zero
+ * status.
+ *
+ * MBAR PHILOSOPHY: After DMA SRAM is updated in the DMA inline functions,
+ * they always do at least an mbar (possibly an msync instead...see below).
+ *
+ * MSYNC does what mbar does, plus ensures consistency across cores. That
+ * is, it waits for snoops (invalidations of L1 cache) on the other cores
+ * to complete before continuing. This guarantees that all of the cores
+ * will see a consistent view of memory after the msync.
+ *
+ * MSYNC EXAMPLE: When a reception counter has hit zero, we assume the
+ * DMA'd data is available to be read by any core. However, old copies of
+ * that data may still be in the L1 caches. Inserting an msync after
+ * detecting that a counter has hit zero guarantees that the old data has
+ * been removed from the L1 caches.
+ *
+ * MSYNC PHILOSOPHY: After the inline functions detect that a counter has
+ * hit zero, they always do an msync.
+ *
+ * SPECULATIVE EXECUTION OF MSYNC: There are cases where msync is done
+ * conditionally. The CPU will begin execution of both sides of the
+ * condition before the result of the condition has been determined.
+ * Then, it will cancel the execution of one side once the result of the
+ * condition has been determined. This speculation is unwanted when
+ * the first instruction on one side of the condition is msync because
+ * cancelling an msync is similar to executing the complete msync.
+ * To avoid this speculative execution of msync, we call
+ * _bgp_msync_nonspeculative(). This will trick the CPU so it won't begin
+ * the msync until the result of the condition is known.
+ *
+ * CALLER ADVICE: Users of these functions should not need to do
+ * mbar/msync themselves, unless they are doing something like the
+ * following: Read a counter and operate on the result when the counter
+ * hasn't reached zero. The caller will need to perform an msync after
+ * reading the counter in order to ensure that snoops have completed
+ * on all CPUs before operating on the DMA'd data.
+ *
+ * \note General discussion on injection fifo interrupts. Both the warning
+ * threshold crossed and full fifo interrupts...
+ *
+ * For remote gets, a fifo is considered available if it has at least 512 bytes
+ * free (32 16B quads). An arriving remote get can be written if there are 512
+ * bytes free, but after that the available goes low and no further remote gets
+ * can be written to any fifo. Furthermore, if any injection fifo has less than
+ * 512 bytes free, the fifo becomes unavailable and any arriving remote get
+ * packet will cause an interrupt to fire and the rDMA will stop.
+ *
+ * Specifically, if an injection fifo has less than 512 B (by either injecting
+ * or remote gets) the iDMA will continue to operate and the rDMA will continue
+ * to operate until any remote get packet arrives to any fifo, at which point
+ * an interrupt fires and the rDMA stops.
+ *
+ * Note that these interrupts were put in for warnings of remote get fifos
+ * becoming nearly full. However the time between when the warning fires and the
+ * condition is cleared may be long, reconfiguring an almost full remote get
+ * fifo is difficult, and recovery from full remote get injection fifos is very
+ * difficult. Since software can prevent this, and since recovery is so
+ * difficult, we consider injection fifo threshold crossing interrupts and
+ * injection fifo full interrupts to be fatal. Thus there is no handler function
+ * in the injection fifo allocation routine.
+ *
+ * So software needs to manage injection and remote get fifo space so that there
+ * are always at least 512 bytes of free space in every fifo. To accomplish
+ * this, software needs to guarantee it won't inject descriptors if doing so
+ * would trigger an interrupt or make the fifo unavailable.
+ *
+ * This can be done by setting the interrupt threshold to 0 (interrupt fires if
+ * free space <= threshhold), and not injecting if after injection there are
+ * less than DMA_MIN_INJECT_SIZE_IN_QUADS (=32) slots. Furthermore, remote
+ * get space should not be allocated if doing so might result in strictly less
+ * than DMA_MIN_INJECT_SIZE_IN_QUADS slots.
+ *
+ */
+
+
+
+#include <common/namespace.h>
+/* #include <memory.h> */
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ * placed in the translation unit (eg. xxx.c)for use within that unit.
+ * The function is not externalized for use by another unit...we want this
+ * so we don't end up with multiple units exporting the same function,
+ * which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+ \verbatim
+ #ifndef INLINE
+ # define INLINE extern inline
+ #endif
+ INLINE int max(int a, int b) {
+ return a > b ? a : b;
+ }
+ \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+ \verbatim
+ #define INLINE
+ #include "header.h"
+ \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+#include <spi/DMA_Assert.h>
+#include <spi/DMA_Fifo.h>
+#include <spi/DMA_Descriptors.h>
+
+/*
+ * You can save a few cycles by using the parallel floating point unit to do the 'memcpy'
+ * as part of injecting a descriptor into a FIFO; but you then need to quadword-align the source memory
+ * and you may need to save/restore the FP context. Setting k_use_fp_to_inject to 0 arranges for the
+ * generated code to use integer registers for the 'memcpy'.
+ */
+enum {
+ k_use_fp_to_inject = 0
+};
+
+
+/*!
+ * \brief Number of Injection Fifo Groups
+ */
+#define DMA_NUM_INJ_FIFO_GROUPS 4
+
+
+/*!
+ * \brief Number of Injection Fifos per Group
+ */
+#define DMA_NUM_INJ_FIFOS_PER_GROUP 32
+
+
+/*!
+ * \brief Number of Injection Fifos (total)
+ */
+#define DMA_NUM_INJ_FIFOS (DMA_NUM_INJ_FIFO_GROUPS*DMA_NUM_INJ_FIFOS_PER_GROUP)
+
+
+/*!
+ * \brief Minimum Free Space Required After Injection
+ *
+ * This is the number of 16-byte quads that need to be free in a fifo after
+ * injection of a descriptor.
+ */
+#define DMA_MIN_INJECT_SIZE_IN_QUADS 32
+
+
+/*!
+ * \brief Number of 16-byte quads in a fifo descriptor
+ *
+ */
+#define DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS 2
+
+
+/*!
+ * \brief Number of bytes in a fifo descriptor
+ *
+ */
+#define DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS*16
+
+
+/*!
+ * \brief Minimum size of a fifo, somewhat arbitrary
+ */
+#define DMA_MIN_INJ_FIFO_SIZE_IN_BYTES (256*4)
+
+
+/*!
+ * \brief Injection DMA Fifo Structure
+ *
+ * This structure contains a software DMA fifo structure (defined in DMA_Fifo.h)
+ * and other fields that are specific to an injection fifo used by software.
+ *
+ * \todo Some more careful thought should be given how to group these so as to
+ * get best memory system performance.
+ * eg. Probably want to ALIGN_L3_CACHE the fifo_hw_ptr.
+ *
+ */
+typedef struct DMA_InjFifo_t
+{
+ DMA_Fifo_t dma_fifo; /*!< Common software fifo structure */
+ unsigned short int fifo_id; /*!< The fifo identifier (0 to
+ DMA_NUM_INJ_FIFOS_PER_GROUP-1). */
+
+ unsigned long long desc_count; /*!< The number of descriptors that have
+ ever been injected into this fifo. */
+
+ unsigned int occupiedSize; /*!< The number of 16B quads in the fifo that
+ are logically occupied. This does not
+ include the DMA_MIN_INJECT_SIZE_IN_QUADS
+ that always remains logically occupied. */
+ /*!
+ * \note The following fields contain info about the fifo that affects the
+ * DCR values configuring the fifo.
+ */
+ unsigned short int priority; /*!< 0 = Normal priority, 1 = High priority.
+ The DMA uses this to determine which
+ injection fifo to serve next.
+ Reflected in DCR addresses
+ _BGP_DCR_iDMA_FIFO_PRIORITY(i), where i
+ is the group_id. 0xD32 - 0xD35.
+ Fifo j is high priority if bit j in the
+ DCR is 1, otherwise it is normal
+ priority. */
+
+ unsigned short int local; /*!< 0 = non-local, 1 = local.
+ If 0, this fifo uses the torus and
+ ts_inj_map must be non-zero.
+ If 1, this fifo is used for tranfsers
+ local to the node only.
+ Reflected in DCR addresses
+ _BGP_DCR_iDMA_LOCAL_COPY(i), where i
+ is the group_id. 0xD5C - 0xD5F.
+ Fifo j is for local transfers if bit j
+ in the DCR is 1, otherwise it is for
+ torus transfers. */
+
+ unsigned char ts_inj_map; /*!< 8 bit vector mask indicating which torus
+ fifos can be used by this DMA fifo.
+ Reflected in DCR addresses
+ _BGP_DCR_iDMA_TS_INJ_FIFO_MAP(k) where k
+ is the fifo_id. 0xD3C - 0xD5B.
+ Fifo k can inject in torus fifo j if
+ bit j of the k'th DCR byte is 1. */
+}
+DMA_InjFifo_t;
+
+
+/*!
+ * \brief DMA Injection Fifo Status structure
+ *
+ * This structure maps the DMA SRAM for a particular group of
+ * DMA_NUM_INJ_FIFOS_PER_GROUP fifos.
+ *
+ */
+typedef struct DMA_InjFifoStatus_t
+{
+ volatile unsigned not_empty; /*!< R bitmask, 1 bit/fifo:
+ Injection FIFO not empty. */
+
+ volatile unsigned reserved_0; /*!< HOLE */
+
+ volatile unsigned available; /*!< R bitmask, 1 bit/fifo:
+ Injection FIFO available. */
+
+ volatile unsigned reserved_1; /*!< HOLE */
+
+ volatile unsigned threshold_crossed; /*!< R bitmask, 1 bit/fifo:
+ Threshold crossed. */
+
+ volatile unsigned reserved_2; /*!< HOLE */
+
+ volatile unsigned clear_threshold_crossed;/*!< W bitmask, 1 bit/fifo:
+ Clear threshold crossed. */
+
+ volatile unsigned reserved_3; /*!< HOLE */
+
+ volatile unsigned activated; /*!< R bitmask, 1 bit/fifo:
+ Retrieve activated fifos. */
+
+ volatile unsigned activate; /*!< W bitmask, 1 bit/fifo:
+ Set "1" to activate fifo. */
+
+ volatile unsigned deactivate; /*!< W bitmask, 1 bit/fifo:
+ Set "1" to deactivate fifo*/
+}
+DMA_InjFifoStatus_t;
+
+
+/*!
+ * \brief DMA Injection Fifo Group Structure
+ *
+ * This structure defines a DMA InjFifo Group. It points to a
+ * DMA InjFifo Status structure, and contains DMA_NUM_INJ_FIFOS_PER_GROUP
+ * DMA InjFifo structures.
+ *
+ * It is passed into the DMA_InjFifoGroupAllocate system call.
+ * The system call sets up the requested fifos, and fills in this fifo group
+ * structure, including the appropriate DMA InjFifo structures within it.
+ *
+ * It also contains permission bits to use the fifos, one bit per fifo.
+ * When the permission bit is on, the corresponding fifo belongs to this
+ * group and can be used. Otherwise, the fifo should not be used as part
+ * of this group. These permission bits are used as follows:
+ * 1. Inline functions will ASSERT when an attempt is made
+ * to use a fifo that is not part of this group.
+ * 2. Inline functions will use the permission bits as a mask
+ * to return status information only for fifos that are allocated
+ * to this group.
+ *
+ */
+typedef struct DMA_InjFifoGroup_t
+{
+ DMA_InjFifoStatus_t *status_ptr; /*!< Pointer to fifo status. */
+
+ DMA_InjFifo_t fifos[DMA_NUM_INJ_FIFOS_PER_GROUP];/*!< Array
+ of fifo structures. The i-th struct
+ is defined and usable only if
+ bit i of permissions = 1. */
+
+ unsigned int permissions; /*!< Permissions bit vector. Bit i is 1
+ if permitted to use fifo i. The fifo
+ is allocated to this group. */
+
+ unsigned int group_id; /*!< The id of this group (0 to
+ DMA_NUM_INJ_FIFO_GROUPS-1). */
+}
+DMA_InjFifoGroup_t;
+
+
+/*!
+ * \brief Remote Get Fifo Full Handler Function Prototype
+ *
+ * A function with this signature receives control when one or more remote
+ * get fifos have filled. This function should do the following to help
+ * make space in the fifo(s):
+ * 1. Determine if there are any remote get fifos full or nearly full.
+ * 2. For each such fifo:
+ * 1. Allocate a larger fifo
+ * 2. Copy the descriptors from the old fifo to the new fifo
+ * 3. Call DMA_InjFifoInitById() to register the new fifo with the DMA
+ * 4. Call DMA_InjFifoSetTailById() to set the new fifo's tail pointer
+ * 5. Free the old fifo
+ *
+ * A function of this type can be registered on DMA_InjFifoGroupAllocate().
+ *
+ * \param[in] fg_ptr Pointer to the fifo group associated with this fifo.
+ * \param[in] f_num The fifo number that has filled. This is
+ * relative to the DMA fifo group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] handler_param An opaque pointer provided by the caller who
+ * registered this handler.
+ */
+typedef void (*DMA_InjFifoRgetFifoFullHandler_t)(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int f_num,
+ void *handler_parm
+ );
+
+
+/*!
+ * \brief Remote Get Fifo Full Handler Table Entry
+ *
+ * This defines an entry in the Remote Get Fifo Full Handler Table.
+ * It identifies the fifo group pointer associated with the full fifo,
+ * and the pointer to the handler function to receive control to handle
+ * the fifo full condition and the opaque pointer to be passed to the
+ * handler function when it is called. The core number of the core that
+ * will process the condition is associated with each entry.
+ */
+typedef struct DMA_InjFifoRgetFifoFullHandlerEntry_t
+{
+ DMA_InjFifoGroup_t *fg_ptr; /*!< Pointer to injection fifo group */
+ DMA_InjFifoRgetFifoFullHandler_t handler; /*!< Pointer to handler function */
+ void *handler_parm; /*!< Pointer to be passed to
+ the handler. */
+ uint32_t core_num;/*!< Core number of the core that
+ will process the condition. */
+} DMA_InjFifoRgetFifoFullHandlerEntry_t;
+
+
+/*!
+ *
+ * \brief Remote Get Fifo Full Handler Table
+ *
+ * An array of entries, one per injection fifo. Each entry specifies the fifo
+ * group structure and the handler function that will receive control to
+ * handle a remote get fifo full condition for fifos in that fifo group.
+ */
+extern DMA_InjFifoRgetFifoFullHandlerEntry_t DMA_RgetFifoFullHandlerTable[DMA_NUM_INJ_FIFOS];
+
+
+/*!
+ * \brief Remote Get Fifo Full Init Has Been Done Indicator
+ *
+ * 0 means the initialization has not been done.
+ * 1 means the initialization has been done.
+ */
+extern int DMA_InjFifoRgetFifoFullInitHasBeenDone;
+
+
+/*!
+ * \brief Remote Get Fifo Full Initialization
+ *
+ * Initialize data structures and interrupt handlers to handle a remote get
+ * fifo full condition.
+ *
+ * \param[in] interruptGroup The handle that identifies the remote get fifo
+ * full interrupts (only one interrupt, in this
+ * case, group 3, irq 24).
+ * \param[in] rget_barrier A function pointer to a function that implments
+ * the barrier that is used by the handler function
+ * to synchronize all cores in the node as they
+ * each handle the interrupt (it is a broadcasted
+ * interrupt).
+ * \param[in] rget_barrier_arg The generic arg to pass to the barrier function.
+ */
+void DMA_InjFifoRgetFifoFullInit( Kernel_InterruptGroup_t interruptGroup,
+ void (*rget_barrier)(void *),
+ void *rget_barrier_arg );
+
+
+/*!
+ * \brief Query Free DMA InjFifos within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available to be allocated) fifos within the specified group.
+ *
+ * \param[in] grp Group number being queried
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1)
+ * \param[out] num_fifos Pointer to an int where the number of free
+ * fifos in the specified group is returned
+ * \param[out] fifo_ids Pointer to an array of num_fifos short ints where
+ * the list of free fifos is returned.
+ * Each short int is the fifo number
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * The caller must provide space for
+ * DMA_NUM_INJ_FIFOS_PER_GROUP ints,
+ * in case the entire fifo group is free.
+ *
+ * \retval 0 Successful. num_fifos and fifo_ids array set as described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ */
+__INLINE__ int DMA_InjFifoGroupQueryFree(
+ int grp,
+ int *num_fifos,
+ int *fifo_ids
+ )
+{
+ return Kernel_InjFifoGroupQueryFree( grp,
+ (uint32_t*)num_fifos,
+ (uint32_t*)fifo_ids);
+}
+
+
+/*!
+ * \brief Allocate DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that allocates specified
+ * DMA injection fifos from the specified group. Parameters specify whether
+ * each fifo is high or normal priority, local or non-local, and which torus
+ * fifos it maps to. A DMA_InjFifoGroup_t structure is returned for
+ * use in other inline functions to operate on the allocated fifos.
+ *
+ * Refer to the interrupt discussion at the top of this include file to see why
+ * there are no interrupt-related parameters.
+ *
+ * \param[in] grp Group number whose DMA injection fifos are being
+ * allocated (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in] num_fifos Number of fifos to be allocated from the group
+ * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in] fifo_ids Pointer to an array of num_fifos ints where
+ * the list of fifos to be allocated is provided.
+ * Each int is the fifo number
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] priorities Pointer to an array of num_fifos short ints where
+ * the list of priorities to be assigned to the fifos
+ * is provided. Each short int indicates the priority
+ * to be assigned to each of the fifos identified in
+ * the fifo_ids array (0 is normal, 1 is high priority).
+ * \param[in] locals Pointer to an array of num_fifos short ints where
+ * an indication is provided of whether each fifo will
+ * be used for local transfers (within the same node)
+ * or torus transfers. Each short int indicates the
+ * local/non-local attribute to be assigned to each of
+ * the fifos identified in the fifo_ids array (0 is
+ * non-local, 1 is local). If 0, the corresponding
+ * array element in ts_inj_maps indicates which torus
+ * fifos can be injected.
+ * \param[in] ts_inj_maps Pointer to an array of num_fifos chars where
+ * the torus fifos that can be injected are specified
+ * for each fifo. Each char specifies which of
+ * the 8 torus injection fifos can be injected when a
+ * descriptor is injected into the DMA injection fifo.
+ * Must be non-zero when the corresponding "locals"
+ * is 0.
+ * Bits 0-3 are for torus group 0.
+ * Bits 4-7 are for torus group 1.
+ * Bits 3 and 7 are the high priority fifos.
+ * \param[in] rget_handler Pointer to a function with prototype
+ * DMA_InjFifoRgetFifoFullHandler_t that will handle
+ * a remote get fifo full condition for fifos in this
+ * fifo group. If NULL is specified, the condition
+ * will not be handled.
+ * \param[in] rget_handler_parm A pointer to opaque storage that will be
+ * passed to the rget_handler.
+ * \param[in] rget_interruptGroup A InterruptGroup_t that identifies the
+ * group of interrupts that handle the remote get
+ * fifo full condition. It is only one interrupt:
+ * group 3, irq 24.
+ * \param[in] rget_barrier Function point to a function that implements
+ * a barrier that is used by the rget fifo full
+ * interrupt handler. This barrier should be across
+ * all cores of all active processes on this compute node.
+ * \param[in] rget_barrier_arg Generic arg to pass to barrier function.
+ * \param[out] fg_ptr Pointer to a structure that is filled in upon
+ * successful return for use in other inline functions
+ * to operate on the allocated fifos.
+ * \li fifos - Array of fifo structures. Structures
+ * for allocated fifos are initialized as
+ * documented below. Structures for
+ * fifos not allocated by this instance of
+ * this syscall are initialized to binary
+ * zeros. Allocated fifos are enabled.
+ * \li status_ptr - Points to status area within the
+ * DMA memory map.
+ * \li permissions - Bits indicating which fifos were
+ * allocated during this syscall.
+ * \li group_id - The id of this group.
+ *
+ * \retval 0 Successful. Fifos allocated and fg_ptr structure filled in as
+ * described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \return The group fifo structure pointed to by fg_ptr is completely
+ * initialized as follows:
+ * - status_ptr points to the appropriate fifo group DMA memory map
+ * - fifo structures array. Fifo structures for fifos not allocated
+ * during this syscall are initialized to binary zeros. Fifo
+ * structures for fifos allocated during this syscall are initialized:
+ * - fifo_hw_ptr points to the DMA memory map for this fifo. The
+ * hardware start, end, head, and tail are set to zero by the
+ * kernel.
+ * - All other fields in the structure are set to zero by the kernel
+ * except priority, local, and ts_inj_map are set to reflect what
+ * was requested in the priorities, locals, and ts_inj_maps
+ * syscall parameters.
+ *
+ */
+__INLINE__ int DMA_InjFifoGroupAllocate(
+ int grp,
+ int num_fifos,
+ int *fifo_ids,
+ unsigned short int *priorities,
+ unsigned short int *locals,
+ unsigned char *ts_inj_maps,
+ DMA_InjFifoRgetFifoFullHandler_t rget_handler,
+ void *rget_handler_parm,
+ Kernel_InterruptGroup_t rget_interruptGroup,
+ void (*rget_barrier)(void *),
+ void *rget_barrier_arg,
+ DMA_InjFifoGroup_t *fg_ptr
+ )
+{
+ int rc;
+ int i, global_fifo_id;
+
+ rc = Kernel_InjFifoGroupAllocate( grp,
+ num_fifos,
+ (uint32_t*)fifo_ids,
+ (uint16_t*)priorities,
+ (uint16_t*)locals,
+ (uint8_t*)ts_inj_maps,
+ (uint32_t*)fg_ptr);
+
+ if ( rc == 0 )
+ {
+ /*
+ * If a remote get fifo full handler has been provided, update the table
+ * to indicate that this handler will handle full conditions on the fifos
+ * just allocated.
+ */
+ if ( rget_handler )
+ {
+ /*
+ * If rget handler init has not been done, do it:
+ */
+ if ( DMA_InjFifoRgetFifoFullInitHasBeenDone == 0 )
+ DMA_InjFifoRgetFifoFullInit( rget_interruptGroup,
+ rget_barrier,
+ rget_barrier_arg );
+
+ for (i=0; i<num_fifos; i++)
+ {
+ global_fifo_id = (grp * DMA_NUM_INJ_FIFOS_PER_GROUP) + fifo_ids[i];
+ DMA_RgetFifoFullHandlerTable[global_fifo_id].fg_ptr = fg_ptr;
+ DMA_RgetFifoFullHandlerTable[global_fifo_id].handler = rget_handler;
+ DMA_RgetFifoFullHandlerTable[global_fifo_id].handler_parm =
+ rget_handler_parm;
+ DMA_RgetFifoFullHandlerTable[global_fifo_id].core_num=
+ Kernel_PhysicalProcessorID();
+ }
+
+ /*
+ * Indicate done with initialization.
+ */
+ DMA_InjFifoRgetFifoFullInitHasBeenDone = 1;
+ }
+ }
+
+ return(rc);
+}
+
+
+/*!
+ * \brief Free DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA injection
+ * counters from the specified group.
+ *
+ * \param[in] grp Group number whose DMA injection fifos are being
+ * freed (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in] num_fifos Number of fifos to be freed from the group
+ * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in] fifo_ids Pointer to an array of num_fifos ints where
+ * the list of fifos to be freed is provided.
+ * Each int is the fifo number (0 to num_fifos-1).
+ * \param[in] fg_ptr Pointer to the structure previously filled in when
+ * these fifos were allocated. Upon successful
+ * return, this structure is updated to reflect the
+ * freed fifos:
+ * \li fifos - Structures for freed fifos zero'd.
+ * Freed fifos are disabled.
+ * \li permissions - Bits cleared for each freed fifo.
+ *
+ * \retval 0 Successful. Fifos freed and fg_ptr structure updated as described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \note This is a fatal error if any of the fifos are non empty and activated
+ *
+ */
+__INLINE__ int DMA_InjFifoGroupFree(
+ int grp,
+ int num_fifos,
+ int *fifo_ids,
+ DMA_InjFifoGroup_t *fg_ptr
+ )
+{
+ return Kernel_InjFifoGroupFree( grp,
+ num_fifos,
+ (uint32_t*)fifo_ids,
+ (uint32_t*)fg_ptr);
+}
+
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Calls to access the Fifo, given a pointer to the injection fifo structure
+ * -----------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Set DMA Injection Fifo Head
+ *
+ * Set a DMA injection fifo's "head", given an injection fifo structure
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ * \param[in] va_head Virtual address of the head to be set
+ *
+ * \return None
+ *
+ * \post va_head is set in both the hardware and software fifo structures,
+ * and the fifo free space is recalculated.
+ *
+ * \note Normally, for an injection fifo, the dma manipulates the head, but in
+ * optimized persistant communications the core can do it if it is sure
+ * the fifo is empty at the time this is called.
+ */
+__INLINE__ void DMA_InjFifoSetHead(
+ DMA_InjFifo_t *f_ptr,
+ void *va_head
+ )
+{
+ SPI_assert( f_ptr != NULL );
+
+ DMA_FifoSetHead( &f_ptr->dma_fifo,
+ va_head );
+}
+
+
+/*!
+ * \brief Increment DMA Injection Fifo Tail
+ *
+ * Increment a DMA injection fifo's "tail", given an injection fifo structure
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ * \param[in] incr The number of quads (16 byte units) to increment the
+ * tail pointer by. This value must be even (ie. descriptors
+ * are 32 bytes).
+ *
+ * \retval None
+ *
+ * \post va_tail is set in both the hardware and software fifo structures,
+ * the fifo free space is recalculated, and the fifo's descriptor count
+ * is incremented according to the incr.
+ *
+ * \note This function does not check if there is free space in the fifo
+ * for this many quads. It must be preceeded by a check of the
+ * free space.
+ */
+__INLINE__ void DMA_InjFifoIncrementTail(
+ DMA_InjFifo_t *f_ptr,
+ unsigned int incr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( (incr & 0x1) == 0 );
+
+ {
+ void *va_tail = DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo );
+
+ void *va_end = DMA_FifoGetEndFromShadow( &f_ptr->dma_fifo );
+
+ unsigned int incr_bytes = incr << 4;
+
+ unsigned int bytes_to_end = (unsigned)va_end - (unsigned)va_tail;
+
+ /*
+ * Note: The following check must be >= instead of just >. We never want
+ * the tail to be equal to the end so we can always copy a descriptor
+ * to the tail, safely.
+ */
+ if ( incr_bytes >= bytes_to_end )
+ {
+ va_tail = (char *)
+ ( (unsigned)DMA_FifoGetStartFromShadow( &f_ptr->dma_fifo ) +
+ ( incr_bytes - bytes_to_end ) );
+ }
+ else
+ {
+ va_tail = (char *)( (unsigned)va_tail + incr_bytes );
+ }
+
+ DMA_FifoSetTail( &f_ptr->dma_fifo,
+ va_tail );
+
+ f_ptr->desc_count += (incr >> 1);
+ }
+
+}
+
+
+/*!
+ * \brief Get DMA Injection Fifo Descriptor Count
+ *
+ * Get a DMA injection fifo's "descriptor count", given an injection fifo
+ * structure
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ *
+ * \retval desc_count The descriptor count for the specified fifo
+ *
+ */
+__INLINE__ unsigned long long DMA_InjFifoGetDescriptorCount(
+ DMA_InjFifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+
+ return f_ptr->desc_count;
+}
+
+
+/*!
+ * \brief Is DMA Descriptor Done
+ *
+ * Return whether a specified descriptor is still in the specified injection
+ * fifo (not done). The descriptor is identified by the descriptor count
+ * immediately after the descriptor was injected into the fifo (returned by
+ * DMA_InjFifoIncrementTail().
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ * \param[in] desc_count The descriptor count immediately after the
+ * descriptor in question was injected into
+ * the fifo.
+ * \param[in] update 0 Do not update the fifo's shadow information.
+ * 1 Update the fifo's shadow information.
+ * It is a performance optimization to only update the
+ * shadow information once for a group of descriptors
+ * being processed.
+ *
+ * \retval 0 False. The descriptor identified by desc_count is not done.
+ * It is still in the fifo.
+ * \retval 1 True. The descriptor identified by desc_count is done.
+ * It is no longer in the fifo.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoIsDescriptorDone(
+ DMA_InjFifo_t *f_ptr,
+ unsigned long long desc_count,
+ unsigned int update
+ )
+{
+ unsigned long long num_desc_in_fifo;
+ unsigned int free_space;
+ DMA_Fifo_t *fifo_ptr;
+
+ SPI_assert( f_ptr != NULL );
+
+ fifo_ptr = &(f_ptr->dma_fifo);
+
+ /* If caller wants a fresh look in the fifo, update its free space.
+ * Otherwise, fetch the free space based on shadows.
+ */
+ if (update)
+ free_space = DMA_FifoGetFreeSpace (fifo_ptr, 1, 0);
+ else
+ free_space = DMA_FifoGetFreeSpaceNoUpdateCalculation(fifo_ptr);
+
+ /* Compute the desc_count of the oldest descriptor in the fifo (minus 1)
+ * Note: Each desc is a 32B unit and the below are 16B entities
+ */
+ num_desc_in_fifo = ( DMA_FifoGetSize(fifo_ptr) - free_space ) / 2;
+
+ /* Determine if the specified desc_count is still in the fifo.
+ * We take the current descriptor count for this fifo and subtract the
+ * number of descriptors still in the fifo. This is the descriptor count
+ * of the oldest descriptor still remaining in the fifo (minus 1).
+ * We compare that with the caller's desc_count to determine if the
+ * caller's descriptor is still in the fifo.
+ */
+ if ( desc_count <= (DMA_InjFifoGetDescriptorCount(f_ptr) - num_desc_in_fifo) )
+ return (1); /* Descriptor is done */
+ else
+ return (0); /* Descriptor is not done */
+
+}
+
+
+/*!
+ * \brief DMA Injection Fifo Reserve Descriptor Storage
+ *
+ * Reserve storage in a DMA injection fifo for a remote get descriptor, given
+ * an injection fifo structure.
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ *
+ * \retval 0 Successful. There was enough space in the fifo and the
+ * storage was reserved.
+ * \retval -1 Unsuccessful. There was not enough space in the fifo.
+ *
+ * \note Internally, this increments the occupiedSize of the fifo by
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS.
+ *
+ */
+__INLINE__ int DMA_InjFifoReserveDescriptorStorage(
+ DMA_InjFifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+
+ if ( (DMA_FifoGetSize(&f_ptr->dma_fifo) - f_ptr->occupiedSize) >=
+ (DMA_MIN_INJECT_SIZE_IN_QUADS + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) ) {
+ f_ptr->occupiedSize += DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS;
+ return (0);
+ }
+ else {
+ return (-1);
+ }
+}
+
+
+/*!
+ * \brief DMA Injection Fifo Free Descriptor Storage Reservation
+ *
+ * Free a reservation for storage for a remote get descriptor in a DMA injection
+ * fifo, previously reserved using DMA_InjFifoReserveDescriptorStorageById().
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ *
+ * \return None
+ *
+ * \note Internally, this decrements the occupiedSize of the fifo by
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS.
+ *
+ */
+__INLINE__ void DMA_InjFifoFreeDescriptorStorageReservation(
+ DMA_InjFifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( f_ptr->occupiedSize >= DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS );
+
+ f_ptr->occupiedSize -= DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS;
+}
+
+
+/*!
+ * \brief Check If An Injection Fifo Has Space For Injection
+ *
+ * Check if an injection fifo has enough space for a single descriptor to be
+ * injected.
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ *
+ * \retval hasSpace An indicator of whether the fifo has space for a
+ * descriptor.
+ * - 0 (false) means the fifo is full.
+ * - 1 (true) means the fifo has space.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoHasSpace(
+ DMA_InjFifo_t *f_ptr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+
+ {
+ unsigned int free_space;
+
+ /* Get the free space in the fifo using the shadow value */
+ free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 0, /* Use shadow head */
+ 0);/* use shadow tail */
+
+ /*
+ * If after injecting, (DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS is the amount we
+ * are going to inject), there is still at least the minimum allowable free
+ * space left in the fifo, go ahead and inject. We want at least
+ * DMA_MIN_INJECT_SIZE_IN_QUADS free space after injection.
+ *
+ * Otherwise, read the hardware head pointer and recalculate the free space,
+ * and check again. Note: We don't need to read the hardware tail
+ * pointer because only software updates that, and we recalculate the
+ * free space at that time.
+ *
+ * If there is still not enough room in the fifo, return 0, indicating that
+ * the descriptor could not be injected.
+ *
+ */
+ if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS )
+ {
+ free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 1, /* Use hardware head */
+ 0); /* Use shadow tail */
+
+ if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) return 0;
+ }
+
+ return 1; /* There is space in the fifo. */
+ }
+}
+
+/* a 32-byte memcpy */
+static inline void DMA_DescriptorToFifo(char *store_ptr, const char *load_ptr)
+{
+ int * store_int=(int *) store_ptr ;
+ int * load_int= (int *) load_ptr ;
+ int v0 = load_int[0] ;
+ int v1 = load_int[1] ;
+ store_int[0] = v0 ;
+ v0 = load_int[2] ;
+ store_int[1] = v1 ;
+ v1 = load_int[3] ;
+ store_int[2] = v0 ;
+ v0 = load_int[4] ;
+ store_int[3] = v1 ;
+ v1 = load_int[5] ;
+ store_int[4] = v0 ;
+ v0 = load_int[6] ;
+ store_int[5] = v1 ;
+ v1 = load_int[7] ;
+ store_int[6] = v0 ;
+ store_int[7] = v1 ;
+}
+/*!
+ * \brief Inject a Descriptor into a DMA Injection Fifo Without Checking for
+ * Space
+ *
+ * Inject a descriptor into a DMA injection fifo, given an injection fifo
+ * structure, without checking to see if there is enough space in the fifo.
+ * It is assumed that the caller has already checked for enough space using
+ * the DMA_InjFifoHasSpace() function.
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ * \param[in] desc A pointer to the descriptor to be injected.
+ * Must be 16-byte aligned.
+ *
+ * \retval numDescInjected The number of descriptors injected.
+ * - 1 means it was successfully injected.
+ *
+ * \see DMA_InjFifoHasSpace()
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptorNoSpaceCheck(
+ DMA_InjFifo_t *f_ptr,
+ DMA_InjDescriptor_t *desc
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( desc != NULL );
+
+ {
+ char *load_ptr, *store_ptr;
+
+ /*
+ * Copy the descriptor to the current va_tail of the fifo.
+ * Msync to ensure the descriptor has been written to memory and the L1 caches
+ * are in sync.
+ * Move the tail past the descriptor so the DMA knows the descriptor is there.
+ * - handle wrapping
+ * - update free space
+ *
+ */
+
+ if( k_use_fp_to_inject)
+ {
+ if ( ( (unsigned)desc & 0xFFFFFFF0 ) == (unsigned)desc ) /* 16B aligned? */
+ {
+ load_ptr = (char*)desc;
+ store_ptr = (char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo );
+ _bgp_QuadLoad ( load_ptr, 0 );
+ _bgp_QuadLoad ( load_ptr+16, 1 );
+ _bgp_QuadStore( store_ptr, 0 );
+ _bgp_QuadStore( store_ptr+16, 1 );
+ }
+ else
+ {
+ memcpy( DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),
+ desc,
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES );
+ }
+ }
+ else
+ {
+ DMA_DescriptorToFifo((char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),(char*)desc) ;
+ }
+
+ /* _bgp_msync(); mbar is good enough */
+ _bgp_mbar();
+
+ DMA_InjFifoIncrementTail( f_ptr,
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS );
+
+ return 1; /* Success */
+ }
+}
+
+
+/*!
+ * \brief Inject a Descriptor into a DMA Injection Fifo
+ *
+ * Inject a descriptor into a DMA injection fifo, given an injection fifo
+ * structure
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ * \param[in] desc A pointer to the descriptor to be injected.
+ * Must be 16-byte aligned.
+ *
+ * \retval numDescInjected The number of descriptors injected.
+ * - 0 means it was not injected, most likely because
+ * the fifo is full.
+ * - 1 means it was successfully injected
+ *
+ * Caution: If you call this function two or more times in quick
+ * succession to try to put descriptors into a FIFO, occasionally
+ * one of the descriptors appears not to be acted on by the hardware.
+ * An alternative is to use DMA_InjFifoInjectDescriptors with a vector
+ * of descriptors; this appears to do the job reliably.
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptor(
+ DMA_InjFifo_t *f_ptr,
+ DMA_InjDescriptor_t *desc
+ )
+{
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( desc != NULL );
+
+ {
+ unsigned int free_space;
+ char *load_ptr, *store_ptr;
+
+ /* Get the free space in the fifo using the shadow value */
+ free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 0, /* Use shadow head */
+ 0);/* use shadow tail */
+
+ /*
+ * If after injecting, (DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS is the amount we
+ * are going to inject), there is still at least the minimum allowable free
+ * space left in the fifo, go ahead and inject. We want at least
+ * DMA_MIN_INJECT_SIZE_IN_QUADS free space after injection.
+ *
+ * Otherwise, read the hardware head pointer and recalculate the free space,
+ * and check again. Note: We don't need to read the hardware tail
+ * pointer because only software updates that, and we recalculate the
+ * free space at that time.
+ *
+ * If there is still not enough room in the fifo, return 0, indicating that
+ * the descriptor could not be injected.
+ *
+ */
+ if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS )
+ {
+ free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 1, /* Use hardware head */
+ 0); /* Use shadow tail */
+
+ if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) return 0;
+ }
+
+ /*
+ * We have enough room in the fifo.
+ * Copy the descriptor to the current va_tail of the fifo.
+ * Msync to ensure the descriptor has been written to memory and the L1 caches
+ * are in sync.
+ * Move the tail past the descriptor so the DMA knows the descriptor is there.
+ * - handle wrapping
+ * - update free space
+ *
+ */
+
+ if( k_use_fp_to_inject)
+ {
+ if ( ( (unsigned)desc & 0xFFFFFFF0 ) == (unsigned)desc ) /* 16B aligned? */
+ {
+ load_ptr = (char*)desc;
+ store_ptr = (char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo );
+ _bgp_QuadLoad ( load_ptr, 0 );
+ _bgp_QuadLoad ( load_ptr+16, 1 );
+ _bgp_QuadStore( store_ptr, 0 );
+ _bgp_QuadStore( store_ptr+16, 1 );
+ }
+ else
+ {
+ memcpy( DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),
+ desc,
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES );
+ }
+ }
+ else
+ {
+ DMA_DescriptorToFifo((char*)DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo ),(char*)desc) ;
+ }
+
+ /* _bgp_msync(); mbar is good enough */
+ _bgp_mbar();
+
+ DMA_InjFifoIncrementTail( f_ptr,
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS );
+
+ return 1; /* Success */
+ }
+}
+
+
+/*!
+ * \brief Inject Multiple Descriptors into a DMA Injection Fifo
+ *
+ * Inject multiple descriptors into a DMA injection fifo, given an injection fifo
+ * structure
+ *
+ * \param[in] f_ptr Pointer to the injection fifo structure
+ * \param[in] num_desc Number of descriptors to be injected
+ * \param[in] desc A pointer to an array of pointers to descriptors to be
+ * injected. The descriptors must be 16-byte aligned.
+ *
+ * \retval numDescInjected The number of descriptors injected.
+ * - less than num_desc means some were not injected,
+ * most likely because the fifo is full.
+ * - num_desc means all were successfully injected
+ *
+ */
+#if 0
+__INLINE__ int DMA_InjFifoInjectDescriptors(
+ DMA_InjFifo_t *f_ptr,
+ int num_desc,
+ DMA_InjDescriptor_t **desc
+ )
+{
+ int i;
+ int rc=0 ;
+ for(i=0;i<num_desc;i+=1)
+ {
+ int rc0=DMA_InjFifoInjectDescriptor(f_ptr,desc[i]) ;
+ rc += rc0 ;
+ }
+ return rc ;
+}
+#else
+__INLINE__ int DMA_InjFifoInjectDescriptors(
+ DMA_InjFifo_t *f_ptr,
+ int num_desc,
+ DMA_InjDescriptor_t **desc
+ )
+{
+ unsigned int free_space;
+ unsigned int total_space_needed_in_quads;
+ void *va_tail;
+ void *va_end;
+ void *va_start;
+ char *target;
+ unsigned int num_quads_to_inject, num_quads_remaining;
+ int i;
+ char *load_ptr, *store_ptr;
+
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( desc != NULL );
+ SPI_assert( num_desc > 0 );
+
+ /* Get the free space in the fifo using the shadow value */
+ free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 0, /* Use shadow head */
+ 0);/* Use shadow tail */
+
+ total_space_needed_in_quads = num_desc *
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS;
+
+ /*
+ * If after injecting all descriptors (DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS
+ * per descriptor is the amount we are going to inject), there is still at
+ * least the minimum allowable free space left in the fifo, go ahead and
+ * inject. We want at least DMA_MIN_INJECT_SIZE_IN_QUADS free space
+ * after injection.
+ *
+ * Otherwise, read the hardware head pointer and recalculate the free space,
+ * and check again.
+ *
+ * If there is still not enough room in the fifo for any descriptors,
+ * return 0. Otherwise, continue and inject as many descriptors as possible.
+ *
+ */
+ if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+ total_space_needed_in_quads )
+ {
+ free_space = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 1, /* Use hardware head */
+ 0); /* Use shadow tail */
+
+ if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS +
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS ) return 0;
+ }
+
+ /*
+ * We have enough room in the fifo for at least some descriptors.
+ * Copy the descriptors (as many as will fit) to the current va_tail of the
+ * fifo.
+ * Msync to ensure the descriptor has been written to memory and the L1 caches
+ * are in sync.
+ * Move the tail past the descriptor so the DMA knows the descriptor is there.
+ * - handle wrapping
+ * - update free space
+ *
+ */
+ va_tail = DMA_FifoGetTailFromShadow( &f_ptr->dma_fifo );
+ va_start = DMA_FifoGetStartFromShadow( &f_ptr->dma_fifo );
+ va_end = DMA_FifoGetEndFromShadow( &f_ptr->dma_fifo );
+ target = (char*)va_tail;
+
+ if ( free_space < DMA_MIN_INJECT_SIZE_IN_QUADS + total_space_needed_in_quads ) {
+ num_quads_to_inject = free_space - DMA_MIN_INJECT_SIZE_IN_QUADS;
+ }
+ else {
+ num_quads_to_inject = total_space_needed_in_quads;
+ }
+ num_quads_remaining = num_quads_to_inject;
+ i = 0;
+
+ while ( num_quads_remaining > 0 )
+ {
+ SPI_assert( desc[i] != NULL );
+
+ if( k_use_fp_to_inject)
+ {
+ if ( ( (unsigned)desc[i] & 0xFFFFFFF0 ) == (unsigned)desc[i] ) /* 16B aligned? */
+ {
+ load_ptr = (char*)desc[i];
+ store_ptr = (char*)target;
+ _bgp_QuadLoad ( load_ptr, 0 );
+ _bgp_QuadLoad ( load_ptr+16, 1 );
+ _bgp_QuadStore( store_ptr, 0 );
+ _bgp_QuadStore( store_ptr+16, 1 );
+ }
+ else
+ {
+ memcpy( (char*)target,
+ desc[i],
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES );
+ }
+ }
+ else
+ {
+ DMA_DescriptorToFifo(target,(char*)(desc[i])) ;
+ }
+
+ i++;
+ num_quads_remaining -= DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS;
+ target += DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES;
+ if ( target >= (char*)va_end )
+ target = (char*)va_start;
+ }
+
+ /* _bgp_msync(); mbar good enough */
+ _bgp_mbar();
+
+ DMA_InjFifoIncrementTail( f_ptr,
+ num_quads_to_inject );
+
+ return i; /* Success */
+
+}
+#endif
+
+/*!
+ * \brief Get DMA Injection Fifo Group Number
+ *
+ * Get the DMA Injection Fifo Group number, given an injection fifo group
+ * structure.
+ *
+ * \param[in] fg_ptr Pointer to the structure previously filled in when the
+ * group was allocated.
+ *
+ * \return The DMA Injection Fifo Group number
+ *
+ */
+__INLINE__ int DMA_InjFifoGetGroupNum(
+ const DMA_InjFifoGroup_t *fg_ptr
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+
+ return fg_ptr->group_id;
+}
+
+
+/*!
+ * \brief Get the "Not Empty" Status of an Injection Fifo Group
+ *
+ * Get the "Not Empty" status of the fifos that the specified fifo group has
+ * permission to use.
+ *
+ * \param[in] fg_ptr Pointer to the injection fifo group structure
+ *
+ * \retval notEmptyStatus A 32-bit value, one bit per fifo.
+ * Bit i is 1 if the specified fifo group has
+ * permission to use fifo i and fifo i is not
+ * empty.
+ * Bit i is 0 if the specified fifo group either
+ * does not have permission to use fifo i, or fifo i
+ * is empty.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetNotEmpty(
+ DMA_InjFifoGroup_t *fg_ptr
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+
+ return ( fg_ptr->status_ptr->not_empty & fg_ptr->permissions );
+
+}
+
+
+/*!
+ * \brief Get the "available" Status of an Injection Fifo Group
+ *
+ * Get the "available" status of the fifos that the specified fifo group has
+ * permission to use. "available" means that the fifo is enabled and
+ * activated.
+ *
+ * \param[in] fg_ptr Pointer to the injection fifo group structure
+ *
+ * \retval availableStatus A 32-bit value, one bit per fifo.
+ * Bit i is 1 if the specified fifo group has
+ * permission to use fifo i and fifo i is available
+ * Bit i is 0 if the specified fifo group either
+ * does not have permission to use fifo i, or fifo i
+ * is not available.
+ *
+ * \note Normally, there should be a 1 in every position except those that
+ * the specified fifo group does not have permission to use.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetAvailable(
+ DMA_InjFifoGroup_t *fg_ptr
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+
+ return ( fg_ptr->status_ptr->available & fg_ptr->permissions );
+
+}
+
+
+/*!
+ * \brief Get the "threshold crossed" Status of an Injection Fifo Group
+ *
+ * Get the "threshold crossed" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in] fg_ptr Pointer to the injection fifo group structure
+ *
+ * \retval thresholdCrossedStatus A 32-bit value, one bit per fifo.
+ * Bit i is 1 if the specified fifo group has
+ * permission to use fifo i and fifo i has crossed
+ * a threshold.
+ * Bit i is 0 if the specified fifo group either
+ * does not have permission to use fifo i, or fifo i
+ * has not crossed a threshold.
+ *
+ * \note Normally, there should be a 0 in every position.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetThresholdCrossed(
+ DMA_InjFifoGroup_t *fg_ptr
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+
+ return ( fg_ptr->status_ptr->threshold_crossed & fg_ptr->permissions );
+
+}
+
+
+/*!
+ * \brief Set the "clear threshold crossed" Status of an Injection Fifo Group
+ *
+ * Set the "clear threshold crossed" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in] fg_ptr Pointer to the injection fifo group structure
+ * \param[in] clr A 32-bit value, one bit per fifo.
+ * Only bits that the specified fifo group has
+ * permission to use should be set to 1.
+ * Set bit i to 1 to clear the threshold crossed status
+ * of fifo i.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetClearThresholdCrossed(
+ DMA_InjFifoGroup_t *fg_ptr,
+ unsigned int clr
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+ SPI_assert( (clr & fg_ptr->permissions) == clr );
+
+ fg_ptr->status_ptr->clear_threshold_crossed = clr;
+
+}
+
+
+/*!
+ * \brief Get the "activated" Status of an Injection Fifo Group
+ *
+ * Get the "activated" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in] fg_ptr Pointer to the injection fifo group structure
+ *
+ * \retval activatedStatus A 32-bit value, one bit per fifo.
+ * Bit i is 1 if the specified fifo group has
+ * permission to use fifo i and fifo i is activated
+ * Bit i is 0 if the specified fifo group either
+ * does not have permission to use fifo i, or fifo i
+ * is not activated.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetActivated(
+ DMA_InjFifoGroup_t *fg_ptr
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+
+ return ( fg_ptr->status_ptr->activated & fg_ptr->permissions );
+
+}
+
+
+/*!
+ * \brief Set the "activate" Status of an Injection Fifo Group
+ *
+ * Set the "activate" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in] fg_ptr Pointer to the injection fifo group structure
+ * \param[in] act A 32-bit value, one bit per fifo.
+ * Only bits that the specified fifo group has
+ * permission to use should be set to 1.
+ * Set bit i to 1 to activate fifo i.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetActivate(
+ DMA_InjFifoGroup_t *fg_ptr,
+ unsigned int act
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+ SPI_assert( (act & fg_ptr->permissions) == act );
+
+ fg_ptr->status_ptr->activate = act;
+
+}
+
+
+/*!
+ * \brief Set the "deactivate" Status of an Injection Fifo Group
+ *
+ * Set the "deactivate" status of the fifos that the specified fifo
+ * group has permission to use.
+ *
+ * \param[in] fg_ptr Pointer to the injection fifo group structure
+ * \param[in] deact A 32-bit value, one bit per fifo.
+ * Only bits that the specified fifo group has
+ * permission to use should be set to 1.
+ * Set bit i to 1 to deactivate fifo i.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetDeactivate(
+ DMA_InjFifoGroup_t *fg_ptr,
+ unsigned int deact
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+ SPI_assert( (deact & fg_ptr->permissions) == deact );
+
+ fg_ptr->status_ptr->deactivate = deact;
+
+}
+
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Calls to access the Fifo, given a fifo group and a fifo ID
+ * -----------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief DMA InjFifo Initialization By Id
+ *
+ * - For an allocated injection DMA fifo, initialize its start, head, tail, and
+ * end.
+ * - Compute fifo size and free space.
+ * - Initialize descriptor count.
+ * - Activate the fifo.
+ *
+ * \param[in] fg_ptr Pointer to fifo group structure.
+ * \param[in] fifo_id Id of the fifo to be initialized
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] va_start Virtual address of the start of the fifo.
+ * \param[in] va_head Virtual address of the head of the fifo (typically
+ * equal to va_start).
+ * \param[in] va_end Virtual address of the end of the fifo.
+ *
+ * \retval 0 Successful.
+ * \retval -1 Unsuccessful. Error checks include
+ * - va_start < va_end
+ * - va_start <= va_head <=
+ * (va_end - DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ * - va_start and va_end are 32-byte aligned
+ * - fifo_size is larger than (DMA_MIN_INJECT_SIZE_IN_QUADS +
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ *
+ */
+__INLINE__ int DMA_InjFifoInitById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ void *va_start,
+ void *va_head,
+ void *va_end
+ )
+{
+ int rc;
+
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fifo_id >= 0 && fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP );
+ SPI_assert( (fg_ptr->permissions & _BN(fifo_id)) != 0 );
+ SPI_assert( va_start < va_end );
+ SPI_assert( va_start <= va_head );
+ SPI_assert( ((uint32_t) va_head) <= ( ((uint32_t) va_end) - DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES) );
+ SPI_assert( ( ( (uint32_t) va_start) & 0xFFFFFFE0) == (uint32_t) va_start );
+ SPI_assert( ( ( (uint32_t) va_end ) & 0xFFFFFFE0) == (uint32_t) va_end );
+ SPI_assert( ( (unsigned)va_end - (unsigned)va_start ) >=
+ ( (DMA_MIN_INJECT_SIZE_IN_QUADS + DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS) * 16 ) );
+
+ /*
+ * Initialize the fifo by invoking a system call. This system call
+ * deactivates the fifo, initializes the start, end, head, and tail,
+ * and activates the fifo.
+ */
+
+ rc = Kernel_InjFifoInitById(
+ (uint32_t*)fg_ptr,
+ fifo_id,
+ (uint32_t*)va_start,
+ (uint32_t*)va_head,
+ (uint32_t*) va_end
+ );
+
+ if (rc) return rc;
+
+ /* Initialize the descriptor count */
+ fg_ptr->fifos[fifo_id].desc_count = 0;
+
+ return 0;
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Start Pointer from the Shadow Using a Fifo Id
+ *
+ * Get a DMA injection fifo's start pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval va_start The virtual address of the start of the fifo
+ *
+ */
+__INLINE__ void * DMA_InjFifoGetStartFromShadowById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_FifoGetStartFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Head Pointer Using a Fifo Id
+ *
+ * Get a DMA injection fifo's head pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval va_head The virtual address of the head of the fifo.
+ *
+ */
+__INLINE__ void * DMA_InjFifoGetHeadById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_FifoGetHead( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Tail Pointer Using a Fifo Id
+ *
+ * Get a DMA injection fifo's tail pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval va_tail The virtual address of the tail of the fifo
+ *
+ */
+__INLINE__ void *DMA_InjFifoGetTailById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_FifoGetTail( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo End Pointer from the Shadow Using a Fifo Id
+ *
+ * Get a DMA injection fifo's end pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval va_end The virtual address of the end of the fifo
+ *
+ */
+__INLINE__ void *DMA_InjFifoGetEndById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_FifoGetEndFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Size Using a Fifo Id
+ *
+ * Get a DMA injection fifo's size, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval size The size of the DMA fifo, in units of 16B quads.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoGetSizeById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_FifoGetSize( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Free Space Using a Fifo Id
+ *
+ * Get a DMA injection fifo's free space, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] read_head Indicates whether to read the head from the hardware
+ * fifo before calculating the free space.
+ * - 1 means to read the hardware head
+ * - 0 means to use the current head shadow
+ * \param[in] read_tail Indicates whether to read the tail from the hardware
+ * fifo before calculating the free space.
+ * - 1 means to read the hardware tail
+ * - 0 means to use the current tail shadow
+ *
+ * \retval freeSpace The amount of free space in the fifo, in units of
+ * 16B quads.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoGetFreeSpaceById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ unsigned int read_head,
+ unsigned int read_tail
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_FifoGetFreeSpace( &fg_ptr->fifos[fifo_id].dma_fifo,
+ read_head,
+ read_tail );
+}
+
+
+/*!
+ * \brief Set DMA InjFifo Head Pointer Using a Fifo Id
+ *
+ * Set a DMA injection fifo's head pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] va_head The virtual address of the head of the fifo.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetHeadById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ void *va_head
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ DMA_InjFifoSetHead( &fg_ptr->fifos[fifo_id],
+ va_head);
+}
+
+
+/*!
+ * \brief Set DMA InjFifo Tail Pointer Using a Fifo Id
+ *
+ * Set a DMA injection fifo's tail pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] va_tail The virtual address of the tail of the fifo.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetTailById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ void *va_tail
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ DMA_FifoSetTail( &fg_ptr->fifos[fifo_id].dma_fifo,
+ va_tail);
+}
+
+
+/*!
+ * \brief Increment DMA InjFifo Tail Pointer Using a Fifo Id
+ *
+ * Increment a DMA injection fifo's tail pointer, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] incr The number of quads (16 byte units) to increment the
+ * tail pointer by.
+ *
+ * \return None
+ *
+ * \note This function does not check if there is free space in the fifo
+ * for this many quads. It must be preceeded by a check of the
+ * free space.
+*/
+__INLINE__ void DMA_InjFifoIncrementTailById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ unsigned int incr
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ DMA_InjFifoIncrementTail( &fg_ptr->fifos[fifo_id],
+ incr);
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Descriptor Count Using a Fifo Id
+ *
+ * Get a DMA injection fifo's descriptor count, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return None
+ *
+ */
+__INLINE__ unsigned long long DMA_InjFifoGetDescriptorCountById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_InjFifoGetDescriptorCount( &fg_ptr->fifos[fifo_id] );
+}
+
+
+/*!
+ * \brief Is DMA Descriptor Done Using a Fifo Id
+ *
+ * Return whether a specified descriptor is still in the specified injection
+ * fifo (not done). The descriptor is identified by the descriptor count
+ * immediately after the descriptor was injected into the fifo (returned by
+ * DMA_InjFifoIncrementTail().
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] desc_count The descriptor count immediately after the
+ * descriptor in question was injected into
+ * the fifo.
+ * \param[in] update 0 Do not update the fifo's shadow information.
+ * 1 Update the fifo's shadow information.
+ * It is a performance optimization to only update the
+ * shadow information once for a group of descriptors
+ * being processed.
+ *
+ * \retval 0 False. The descriptor identified by desc_count is not done.
+ * It is still in the fifo.
+ * \retval 1 True. The descriptor identified by desc_count is done.
+ * It is no longer in the fifo.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoIsDescriptorDoneById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ unsigned long long desc_count,
+ unsigned int update
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return(DMA_InjFifoIsDescriptorDone( &fg_ptr->fifos[fifo_id],
+ desc_count,
+ update ) );
+
+}
+
+
+/*!
+ * \brief DMA Injection Fifo Reserve Descriptor Storage Using a Fifo Id
+ *
+ * Reserve storage in a DMA injection fifo for a remote get descriptor, given
+ * a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval 0 Successful. There was enough space in the fifo and the
+ * storage was reserved.
+ * \retval -1 Unsuccessful. There was not enough space in the fifo.
+ *
+ * \note Internally, this increments the occupiedSize of the fifo by
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS.
+ *
+ */
+__INLINE__ int DMA_InjFifoReserveDescriptorStorageById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return ( DMA_InjFifoReserveDescriptorStorage( &fg_ptr->fifos[fifo_id] ) );
+}
+
+
+/*!
+ * \brief DMA Injection Fifo Free Descriptor Storage Reservation Using a Fifo Id
+ *
+ * Free a reservation for storage for a remote get descriptor in a DMA injection
+ * fifo, previously reserved using DMA_InjFifoReserveDescriptorStorageById().
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return None
+ *
+ * \note Internally, this decrements the occupiedSize of the fifo by
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS.
+ *
+ */
+__INLINE__ void DMA_InjFifoFreeDescriptorStorageReservationById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ DMA_InjFifoFreeDescriptorStorageReservation( &fg_ptr->fifos[fifo_id] );
+ return;
+}
+
+
+/*!
+ * \brief Check If An Injection Fifo Has Space For Injection Using a Fifo Id
+ *
+ * Check if an injection fifo has enough space for a single descriptor to be
+ * injected, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval hasSpace An indicator of whether the fifo has space for a
+ * descriptor.
+ * - 0 (false) means the fifo is full.
+ * - 1 (true) means the fifo has space.
+ *
+ */
+__INLINE__ unsigned int DMA_InjFifoHasSpaceById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_InjFifoHasSpace( &fg_ptr->fifos[fifo_id] );
+}
+
+
+/*!
+ * \brief Inject a Descriptor into a DMA Injection Fifo Without Checking for
+ * Space Using a Fifo Id
+ *
+ * Inject a descriptor into a DMA injection fifo, given a fifo group and
+ * fifo id, without checking to see if there is enough space in the fifo.
+ * It is assumed that the caller has already checked for enough space using
+ * the DMA_InjFifoHasSpaceById() function.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] desc A pointer to the descriptor to be injected.
+ * Must be 16-byte aligned.
+ *
+ * \retval numDescInjected The number of descriptors injected.
+ * - 1 means it was successfully injected.
+ *
+ * \see DMA_InjFifoHasSpaceById()
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptorNoSpaceCheckById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ DMA_InjDescriptor_t *desc
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_InjFifoInjectDescriptorNoSpaceCheck( &fg_ptr->fifos[fifo_id],
+ desc );
+}
+
+
+/*!
+ * \brief Inject Descriptor into a DMA InjFifo Using a Fifo Id
+ *
+ * Inject a descriptor into a DMA injection fifo, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] desc Pointer to the descriptor to be injected into the fifo.
+ *
+ * \retval numDescInjected The number of descriptors injected.
+ * - 0 means it was not injected, most likely because
+ * the fifo is full.
+ * - 1 means it was successfully injected
+ *
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptorById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ DMA_InjDescriptor_t *desc
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_InjFifoInjectDescriptor( &fg_ptr->fifos[fifo_id],
+ desc );
+}
+
+
+/*!
+ * \brief Inject Multiple Descriptors into a DMA InjFifo Using a Fifo Id
+ *
+ * Inject multiple descriptors into a DMA injection fifo, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] num_desc Number of descriptors to be injected
+ * \param[in] desc Pointer to an array of pointers to the descriptors to
+ * be injected into the fifo.
+ *
+ * \retval numDescInjected The number of descriptors injected.
+ * - less than num_desc means that some were not
+ * injected, most likely because the fifo is full.
+ * - equal to num_desc means that all were
+ * successfully injected.
+ *
+ */
+__INLINE__ int DMA_InjFifoInjectDescriptorsById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ int num_desc,
+ DMA_InjDescriptor_t **desc
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return DMA_InjFifoInjectDescriptors ( &fg_ptr->fifos[fifo_id],
+ num_desc,
+ desc );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Not Empty Status Using a Fifo Id
+ *
+ * Get a DMA injection fifo's not empty status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return 32-bit status. status bit "fifo_id" is 1 if the
+ * fifo is not empty, 0 if empty. That is, the return value
+ * is 0 if empty, non-zero if not empty.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetNotEmptyById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return ( DMA_InjFifoGetNotEmpty( fg_ptr ) & _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Available Status Using a Fifo Id
+ *
+ * Get a DMA injection fifo's available status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return 32-bit status. status bit fifo_id is 1 if the
+ * fifo is available, 0 if empty.
+ *
+ * \note "available" status means the fifo is enabled and activated.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetAvailableById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return ( DMA_InjFifoGetAvailable( fg_ptr ) & _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Threshold Crossed Status Using a Fifo Id
+ *
+ * Get a DMA injection fifo's threshold crossed status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return 32-bit status. status bit fifo_id is 1 if the
+ * fifo's threshold has been crossed, 0 if not.
+ *
+ * \note This will always be zero.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetThresholdCrossedById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return ( DMA_InjFifoGetThresholdCrossed( fg_ptr ) & _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Clear DMA InjFifo Threshold Crossed Status Using a Fifo Id
+ *
+ * Clear a DMA injection fifo's threshold crossed status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetClearThresholdCrossedById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ DMA_InjFifoSetClearThresholdCrossed( fg_ptr,
+ _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Get DMA InjFifo Activated Status Using a Fifo Id
+ *
+ * Get a DMA injection fifo's activated status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return 32-bit status. status bit fifo_id is 1 if the
+ * fifo is activated, 0 if empty.
+ *
+ */
+__INLINE__ unsigned DMA_InjFifoGetActivatedById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ return ( DMA_InjFifoGetActivated( fg_ptr ) & _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Activate DMA InjFifo Using a Fifo Id
+ *
+ * Activate a DMA injection fifo, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetActivateById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ DMA_InjFifoSetActivate( fg_ptr,
+ _BN(fifo_id) );
+}
+
+
+/*!
+ * \brief Deactivate DMA InjFifo Using a Fifo Id
+ *
+ * Deactivate a DMA injection fifo, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_InjFifoSetDeactivateById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fg_ptr->permissions & _BN(fifo_id) ) != 0 );
+
+ DMA_InjFifoSetDeactivate( fg_ptr,
+ _BN(fifo_id) );
+}
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_Packet.h b/arch/powerpc/include/spi/DMA_Packet.h
new file mode 100644
index 00000000000000..2fea7c98f6c437
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_Packet.h
@@ -0,0 +1,347 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+#ifndef _DMA_PACKET_H_ /* Prevent multiple inclusion */
+#define _DMA_PACKET_H_
+
+
+/*!
+ * \file spi/DMA_Packet.h
+ *
+ * \brief DMA SPI Packet Definitions
+ *
+ */
+
+
+#include <common/namespace.h>
+
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the X plus direction.
+ */
+#define DMA_PACKET_HINT_XP (0x20)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the X minus direction.
+ */
+#define DMA_PACKET_HINT_XM (0x10)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the Y plus direction.
+ */
+#define DMA_PACKET_HINT_YP (0x08)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the Y minus direction.
+ */
+#define DMA_PACKET_HINT_YM (0x04)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the Z plus direction.
+ */
+#define DMA_PACKET_HINT_ZP (0x02)
+
+
+/*!
+ * \brief Hint Bit: Packet wants to travel in the Z minus direction.
+ */
+#define DMA_PACKET_HINT_ZM (0x01)
+
+
+/*!
+ * \brief Virtual Channel Bits: Dynamic 0.
+ */
+#define DMA_PACKET_VC_D0 (0)
+
+
+/*!
+ * \brief Virtual Channel Bits: Dynamic 1.
+ */
+#define DMA_PACKET_VC_D1 (1)
+
+
+/*!
+ * \brief Virtual Channel Bits: Deterministic Bubble Normal.
+ */
+#define DMA_PACKET_VC_BN (2)
+
+
+/*!
+ * \brief Virtual Channel Bits: Deterministic Bubble Priority.
+ */
+#define DMA_PACKET_VC_BP (3)
+
+
+/*!
+ * \brief Dynamic Routing Bit: Follows deterministic Routing.
+ */
+#define DMA_PACKET_DETERMINSTIC (0)
+
+
+/*!
+ * \brief Dynamic Routing Bit: Follows dynamic Routing.
+ */
+#define DMA_PACKET_DYNAMIC (1)
+
+
+/*!
+ * \brief Torus Hardware Packet Header Constants for Routing: Point to Point.
+ */
+#define DMA_PACKET_POINT2POINT (0)
+
+
+/*!
+ * \brief Torus Hardware Packet Header Constants for Routing: Class.
+ */
+#define DMA_PACKET_CLASS (1)
+
+
+/*!
+ * \brief Torus DMA Hardware Packet Header.
+ *
+ * There are two sections of the packet header: The hardware header
+ * and the software header.
+ *
+ * The same 8-byte hardware header as was used on Blue Gene/L is used
+ * for Blue Gene/P, except that two bits that were previously unused
+ * will be used as follows:
+ *
+ * - The Pid bit on Blue Gene/L indicates the logical destination group.
+ * This determines the reception fifo group a packet ends up in.
+ * This bit is now called Pid0. The new Pid1 bit expands the logical
+ * destination group from two to four. This corresponds to the increase
+ * in cores from two to four.
+ *
+ * - The new Dm bit indicates the DMA mode: Memory fifo or direct.
+ * In memory fifo mode, the DMA receives packets from the torus fifos into
+ * reception fifos located in memory. Then the core copies the payload
+ * from the memory fifo to its final destination. In direct mode, the DMA
+ * moves the packet payload directly from the torus fifos to its final
+ * destination.
+ *
+ * The 8-byte software header was used by the software on Blue Gene/L for
+ * its own purposes. On Blue Gene/P, parts of it are used by the DMA,
+ * depending on the type of DMA transfer being used. The usage of the fields
+ * in the software header is as follows for the typical types of DMA transfers:
+ *
+ * - In memory fifo mode,
+ * - The first 4 bytes of the software header contain the "put offset".
+ * This is the offset from the injection counter's base address, in bytes,
+ * of the memory being transferred in this packet.
+ * - The last 4 bytes of the software header is for use by software.
+ *
+ * - In direct put mode,
+ * - The first 4 bytes of the software header contain the "put offset".
+ * This is the offset from the reception counter's base address, in bytes,
+ * of the memory where the payload in this packet is placed.
+ * - The fifth byte of the software header is the reception counter Id.
+ * - The sixth byte of the software header is the number of valid bytes of
+ * payload in this packet.
+ * - The seventh byte of the software header contains DMA flags. Specifically,
+ * the remote-get flag is 0.
+ * - The last byte of the software header is for use by software.
+ *
+ * - In remote get mode, the payload contains one or more injection descriptors
+ * describing data to be transferred by the DMA. When the DMA receives this
+ * packet, it injects the descriptors into injection fifos to perform the
+ * specified data transfer.
+ * - The first 5 bytes of the software header are for use by software.
+ * - The sixth byte of the software header is the number of valid bytes of
+ * payload in this packet. This will be a multiple of 32, since the payload
+ * consists of one or more 32 byte DMA descriptors.
+ * - The seventh byte of the software header contains DMA flags. Specifically,
+ * the remote-get flag is 1.
+ * - The eighth byte of the software header is the injection fifo Id where
+ * the descriptors in the payload will be injected.
+ *
+ */
+typedef struct DMA_PacketHeader_t
+{
+ union {
+ unsigned word0; /*!< First 4 bytes of packet header. */
+
+ struct {
+ unsigned CSum_Skip : 7; /*!< Number of 2 byte units to skip from
+ the top of a packet before including
+ the packet bytes into the running
+ checksum of the torus injection fifo
+ where this packet is injected.
+ */
+
+ unsigned Sk : 1; /*!< Torus injection checksum skip packet
+ bit.
+ - 0 includes the packet (excluding the
+ portion designated by DMA_CSUM_SKIP)
+ in the checksum.
+ - 1 excludes the entire packet from
+ the checksum.
+ */
+
+ unsigned Hint : 6; /*!< Hint bits for torus routing (6 bits).
+ Each bit corresponds to x+, x-, y+, y-,
+ z+, z-. If a bit is set, it indicates
+ that the packet wants to travel along
+ the corresponding direction. If all
+ bits are zero, the hardware calculates
+ the hint bits. Both x+ and x- cannot
+ be set at the same time...same with y
+ and z.
+ */
+
+ unsigned Dp : 1; /*!< Deposit Bit for Class Routed MultiCast.
+ If this bit is set to 1, then as the
+ packet travels along a straight line
+ to its destination, it also deposits
+ a copy of itself into each node as it
+ goes through. This feature must be
+ used only if the packet is set to
+ travel along a straight line.
+ */
+
+ unsigned Pid0 : 1; /*!< Destination Fifo Group Most Significant
+ Bit. (Pid0,Pid1) specifies which of 4
+ reception fifo groups that this packet
+ is destined for.
+ */
+
+ unsigned Chunks : 3; /*!< Size in Chunks of 32B (0 for 1 chunk,
+ ... , 7 for 8 chunks).
+ */
+
+ unsigned Pid1 : 1; /*!< Destination Fifo Group Least
+ significant bit. Refer to Pid0.
+ */
+
+ unsigned Dm : 1; /*!< 1=DMA Mode, 0=Fifo Mode. */
+
+ unsigned Dynamic : 1; /*!< 1=Dynamic Routing,
+ 0=Deterministic Routing.
+ */
+
+ unsigned VC : 2; /*!< Virtual Channel
+ - 0=Dynamic 0
+ - 1=Dynamic 1
+ - 2=Deterministic Bubble Normal
+ - 3=Deterministic Bubble Priority
+ */
+
+ unsigned X : 8; /*!< Destination X Physical Coordinate. */
+
+ }; /* End of individual fields in Word 0 */
+
+ }; /* End of Word 0 */
+
+
+ union {
+
+ unsigned word1; /*!< Second 4 bytes of packet header. */
+
+ struct {
+ unsigned Y : 8; /*!< Destination Y Physical Coordinate. */
+
+ unsigned Z : 8; /*!< Destination Z Physical Coordinate. */
+
+ unsigned Resvd0 : 8; /*!< Reserved (pkt crc). */
+
+ unsigned Resvd1 : 8; /*!< Reserved (pkt crc). */
+
+ }; /* End of individual fields in Word 1 */
+
+ }; /* End of Word 1 */
+
+
+ union {
+
+ unsigned word2; /*!< Third 4 bytes of packet header. */
+
+ unsigned Put_Offset; /*!< For a memory fifo packet, gives a
+ unique ID to each packet in a long
+ message. Derived from the put offset
+ of the torus packet header in the
+ descriptor, and updated for each
+ packet.
+ For a direct-put packet, the rDMA
+ writes the first payload byte to this
+ offset plus the base address
+ corresponding to the rDMA counter ID.
+ */
+
+ unsigned Single_Packet_Parameter; /*!< For a single memory fifo packet,
+ this is essentially unused space
+ that can be used to pass a
+ parameter to the target node.
+ */
+ }; /* End of Word 2 */
+
+
+ union {
+
+ unsigned word3; /*!< Fourth 4 bytes of packet header. */
+
+ struct {
+ unsigned rDMA_Counter : 8; /*!< For a direct-put packet, this is the
+ number of the rDMA counter associated
+ with this packet.
+ */
+
+ unsigned Payload_Bytes : 8; /*!< For a direct-put packet, this is the
+ number of valid bytes in the payload.
+ This is set by the iDMA, based on the
+ message length in the injection
+ descriptor.
+ */
+
+ unsigned Flags : 8; /*!< Flags[6]=Pacing, Flags[7]=Remote-Get.*/
+
+ unsigned iDMA_Fifo_ID : 8; /*!< For a remote-get packet, this is the
+ iDMA fifo ID to be injected during
+ remote-get processing.
+ */
+ };
+
+ struct { /* For memory fifo packets... */
+
+ unsigned SW_Arg : 24; /*!< User-defined. */
+
+ unsigned Func_Id : 8 ; /*!< Function ID for dispatching receiver
+ functions from Polling reception
+ fifos.
+ */
+ };
+
+ }; /* End of Word 3 */
+
+}
+ALIGN_QUADWORD DMA_PacketHeader_t;
+
+
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/DMA_RecFifo.h b/arch/powerpc/include/spi/DMA_RecFifo.h
new file mode 100644
index 00000000000000..20c8b34538d9ff
--- /dev/null
+++ b/arch/powerpc/include/spi/DMA_RecFifo.h
@@ -0,0 +1,1810 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+
+#ifndef _DMA_RECFIFO_H_ /* Prevent multiple inclusion */
+#define _DMA_RECFIFO_H_
+
+
+/*!
+ * \file spi/DMA_RecFifo.h
+ *
+ * \brief DMA SPI Reception Fifo Definitions and Inline Functions
+ *
+ * There are
+ * - 6 normal-priority torus hardware fifos
+ * - 1 high-priority torus hardware fifo
+ * - 1 local memcpy reception fifo
+ * If we assigned a reception fifo to each, there would be 8. These are called
+ * "normal reception fifos".
+ *
+ * There is one reception fifo used to store packet headers only (for debug).
+ * This is called the "header reception fifo".
+ *
+ * The hardware packet header's (Pid0, Pid1) bits specify up to four processors.
+ * There is one set of "normal" and one "header" reception fifos per processor,
+ * called a "reception fifo group". Thus, there are 4 groups.
+ *
+ */
+
+
+#include <common/namespace.h>
+/* #include <common/bgp_ras.h> */
+
+
+__BEGIN_DECLS
+
+
+/*!
+ * \brief __INLINE__ definition
+ *
+ * Option 1:
+ * Make all functions be "static inline":
+ * - They are inlined if the compiler can do it
+ * - If the compiler does not inline it, a single copy of the function is
+ * placed in the translation unit (eg. xxx.c)for use within that unit.
+ * The function is not externalized for use by another unit...we want this
+ * so we don't end up with multiple units exporting the same function,
+ * which would result in linker errors.
+ *
+ * Option 2:
+ * A GNU C model: Use "extern inline" in a common header (this one) and provide
+ * a definition in a .c file somewhere, perhaps using macros to ensure that the
+ * same code is used in each case. For instance, in the header file:
+ *
+ \verbatim
+ #ifndef INLINE
+ # define INLINE extern inline
+ #endif
+ INLINE int max(int a, int b) {
+ return a > b ? a : b;
+ }
+ \endverbatim
+ *
+ * ...and in exactly one source file (in runtime/SPI), that is included in a
+ * library...
+ *
+ \verbatim
+ #define INLINE
+ #include "header.h"
+ \endverbatim
+ *
+ * This allows inlining, where possible, but when not possible, only one
+ * instance of the function is in storage (in the library).
+ */
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+
+#include <spi/DMA_Fifo.h>
+#include <spi/DMA_Assert.h>
+#include <spi/DMA_Packet.h>
+
+
+
+
+/*!
+ * \brief Number of Normal (non-header) Reception Fifos Per Group
+ *
+ * These will have fifo IDs 0 through 7 in a group.
+ *
+ */
+#define DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP 8
+
+
+/*!
+ * \brief Number of Header Reception Fifos Per Group
+ *
+ */
+#define DMA_NUM_HEADER_REC_FIFOS_PER_GROUP 1
+
+
+/*!
+ * \brief Fifo ID of the Header Reception Fifo in a group.
+ *
+ * This will be fifo ID 8 in a group.
+ */
+#define DMA_HEADER_REC_FIFO_ID (DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP)
+
+
+/*!
+ * \brief Number of Reception Fifos Per Group
+ *
+ * 8 Normal + 1 Header
+ */
+#define DMA_NUM_REC_FIFOS_PER_GROUP (DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP + \
+ DMA_NUM_HEADER_REC_FIFOS_PER_GROUP)
+
+
+/*!
+ * \brief Number of Reception Fifo Groups
+ *
+ * One group for each processor, identified by (Pid0,Pid1) in the packet header.
+ */
+#define DMA_NUM_REC_FIFO_GROUPS 4
+
+
+/*!
+ * \brief Total Number of Normal Reception Fifos
+ */
+#define DMA_NUM_NORMAL_REC_FIFOS (DMA_NUM_REC_FIFO_GROUPS * \
+ DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP)
+
+/*!
+ * \brief Total Number of Header Reception Fifos
+ */
+#define DMA_NUM_HEADER_REC_FIFOS (DMA_NUM_REC_FIFO_GROUPS * \
+ DMA_NUM_HEADER_REC_FIFOS_PER_GROUP)
+
+
+/*!
+ * \brief The maximum number of packets that can be processed by a polling
+ * function before it must update the fifo's hardware head. The
+ * polling function can keep track of the head in the va_head shadow
+ * and only update the hardware head when this limit is reached to
+ * reduce overhead.
+ */
+#define DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD 32
+
+
+/*!
+ * \brief Minimum Reception Fifo Size in bytes
+ *
+ * It is important that this size be enough to hold more packets than
+ * DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD. Otherwise, the polling
+ * function may deadlock with the DMA (the DMA considers the fifo full,
+ * but we have actually processed all of the packets).
+ * Additionally, we add 512 bytes to this, since the DMA will only fill
+ * the fifo to fifoSize - 512 bytes.
+ *
+ *
+ */
+#define DMA_MIN_REC_FIFO_SIZE_IN_BYTES (512 + (256 * DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD))
+
+/*!
+ * \brief DMA Reception Fifo Map Structure
+ *
+ * This structure defines the basic reception fifo configuration.
+ * It is common across all reception fifo groups.
+ *
+ * Example 1:
+ * In SMP mode you might have only two reception fifos:
+ * - One for normal-priority torus and local transfers, and
+ * - one for high-priority torus transfers.
+ * In this case, only one fifo group would be needed.
+ *
+ * Example 2:
+ * In virtual node mode, you might have two reception fifos per group (as
+ * described in Example 1), and 4 groups, one for each processor.
+ * All packets with the same (pid0,pid1) bits use the same group.
+*/
+typedef struct DMA_RecFifoMap_t
+{
+ unsigned short int save_headers; /*!< Flag that specifies whether header
+ will be used to store packet headers.
+ - 0 => Do not store headers
+ - 1 => Store headers (debug mode) */
+
+ unsigned int fifo_types[ DMA_NUM_NORMAL_REC_FIFOS ]; /*!< The type of each
+ normal rec fifo. If entry i is
+ - 0, rec fifo i is type 0
+ - 1, rec fifo i is type 1
+
+ For type i fifo, threshold interrupt
+ fires if fifo free space <=
+ threshold[i], in units of 16B quads. */
+
+ unsigned int hdr_fifo_types[ DMA_NUM_HEADER_REC_FIFOS ]; /*!< The type of
+ each header reception fifo. If entry
+ i is
+ - 0, header rec fifo i is type 0
+ - 1, header rec fifo i is type 1
+
+ For type i fifo, threshold interrupt
+ fires if fifo free space <=
+ threshold[i], in units of 16B quads. */
+
+ unsigned int threshold[2]; /*!< For type i fifo, threshold interrupt
+ fires if fifo free space <=
+ threshold[i], in units of 16B quads. */
+
+ unsigned char ts_rec_map[4][8]; /*!< Torus Reception Map.
+ This array contains the rec fifo ID into which torus
+ packets are deposited that originate from
+ - each hardware torus fifo (x+,x-,y+,y-,z+,z-) (0 through 5)
+ - high-priority hardware torus fifo (6)
+ - a local transfer (7)
+
+ for each group (0 through 3).
+
+ For ts_rec_map[i][j],
+ i is the rec fifo group ID, as defined by (pid0,pid1) pair.
+ j identifies the hardware torus fifo (0-5 for
+ x+,x-,y+,y-,z+,z- repectively), high-priority
+ torus fifo (6), and local transfer (7).
+ The value in each arrary element must be a global fifo ID
+ (between 0 and DMA_NUM_NORMAL_REC_FIFOS-1). That is, the value
+ identifies the normal rec fifo that will receive packets
+ originating at i,j.
+ Note that the global fifo ID must be 0-7 for group 0,
+ 8-15 for group 1, 16-23 for group 2, and 24-31 for group 3.
+
+ This affects DCRS 0xd60 to 0xd67 as defined by the following
+ - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_XY (_BGP_DCR_DMA+0x60)
+ - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_ZHL (_BGP_DCR_DMA+0x61)
+ - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_XY (_BGP_DCR_DMA+0x62)
+ - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_ZHL (_BGP_DCR_DMA+0x63)
+ - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_XY (_BGP_DCR_DMA+0x64)
+ - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_ZHL (_BGP_DCR_DMA+0x65)
+ - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_XY (_BGP_DCR_DMA+0x66)
+ - _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_ZHL (_BGP_DCR_DMA+0x67)
+
+ e.g., for (pid0,pid1) = (0,1)
+ - ts_rec_map[1][0] = fifo id for torus x+ receiver packets
+ - ts_rec_map[1][1] = fifo id for torus x- receiver packets
+ - ts_rec_map[1][2] = fifo id for torus y+ receiver packets
+ - ts_rec_map[1][3] = fifo id for torus y- receiver packets
+ - ts_rec_map[1][4] = fifo id for torus z+ receiver packets
+ - ts_rec_map[1][5] = fifo id for torus z- receiver packets
+ - ts_rec_map[1][6] = fifo id for torus high priority packets
+ - ts_rec_map[1][7] = fifo id for local transfer packets */
+
+} DMA_RecFifoMap_t;
+
+
+/*!
+ * \brief DMA Reception Fifo Status Structure
+ *
+ * Defines the DMA SRAM Status Area for Reception Fifos
+ *
+ */
+typedef struct DMA_RecFifoStatus_t
+{
+ volatile unsigned not_empty[2]; /*!< R bit mask, 1 bit/FIFO:
+ Reception FIFO not empty status.
+ Not_empty[0] contains 1 bit for each
+ of the 32 normal fifos.
+ Each bit corresponds to a
+ global fifo ID.
+ Not_empty[1] :
+ - bit 7 for group 0 header fifo,
+ - bit 15 for group 1 header fifo,
+ - bit 23 for group 2 header fifo,
+ - bit 31 for group 3 header fifo. */
+
+ volatile unsigned available[2]; /*!< R bitmask, 1 bit/FIFO:
+ Reception FIFO available status.
+ Bits are as above for available[0]
+ and available[1]. */
+
+ volatile unsigned threshold_crossed[2]; /*!< R bitmask, 1 bit/FIFO:
+ Threshold crossed status.
+ Bits are as above for
+ threshhold_crossed[0] and
+ threshhold_crossed[1]. */
+
+ volatile unsigned clear_threshold_crossed[2]; /*!< W bitmask, 1 bit/FIFO:
+ Clear Threshold Crossed Status.
+ Bits are as above for
+ clear_threshold_crossed[0] and
+ clear_threshold_crossed[1]. */
+}
+DMA_RecFifoStatus_t;
+
+
+/*!
+ * \brief Returns the word number that the specified reception fifo is in
+ *
+ * \param[in] global_rec_fifo_id The global ID of the reception fifo
+ * (0 to DMA_NUM_REC_FIFOS-1).
+ *
+ * \return The number of the word that the specified fifo is in (0 or 1).
+ *
+ * Used as an index in the "not_empty", "available", "threshold_crossed", and
+ * "clear_threshold_crossed" fields of the DMA_RecFifoStatus_t structure.
+ *
+ */
+#define DMA_REC_FIFO_GROUP_WORD_ID(global_rec_fifo_id) \
+ ( ((global_rec_fifo_id)>>5) & _BN(31) )
+
+
+/*!
+ * \brief Reception DMA Fifo Structure
+ *
+ * This structure contains a software DMA fifo structure (defined in DMA_Fifo.h)
+ * and other fields that are specific to a reception fifo used by software.
+ */
+typedef struct DMA_RecFifo_t
+{
+ DMA_Fifo_t dma_fifo; /*!< Common software fifo structure */
+
+ unsigned char global_fifo_id; /*!< Global fifo ID:
+ - 0 to DMA_NUM_NORMAL_REC_FIFOS-1
+ for normal fifos,
+ - 32-35 for header fifos. */
+ /*!
+ * \note The following field contains info about the fifo that reflects the
+ * DCR values configuring the fifo.
+ */
+
+ unsigned char type; /*!< 0 or 1 for type of fifo. */
+
+ /*!
+ * \note The following field is used by the reception fifo polling functions.
+ * It counts the number of packets processed since the fifo's hardware
+ * head was last updated. When DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD
+ * is reached, the hardware head is moved and this counter is reset
+ * to zero. This helps to minimize the number of times the hardware
+ * head is updated, which can be an expensive operation.
+ */
+ unsigned int num_packets_processed_since_moving_fifo_head; /*!< Tracks when
+ to move the
+ fifo head. */
+}
+DMA_RecFifo_t;
+
+
+/*!
+ * \brief DMA Reception Fifo Group Structure
+ *
+ * This structure defines a DMA Reception Fifo Group. It points to a
+ * Reception Fifo Status structure, and contains DMA_NUM_REC_FIFOS_PER_GROUP
+ * Reception Fifo structures.
+ *
+ * It is returned from the DMA_RecFifoGetFifoGroup system call wrapper function
+ * defined in this header file. This same structure must be used by all users
+ * of reception fifos in this group because the fifos will contain packets
+ * destined for these different users, and this structure contains shadows of
+ * the hardware fifos in the DMA SRAM that must be maintained as the fifos
+ * change. This common structure is located in static storage
+ * declared in DMA_RecFifo.c.
+ *
+ */
+typedef struct DMA_RecFifoGroup_t
+{
+ int group_id; /*!< Group ID (0 through DMA_NUM_REC_FIFO_GROUPS-1) */
+
+ int num_normal_fifos; /*!< Number of normal fifos used in this group
+ (0 through DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP) */
+
+ int num_hdr_fifos; /*!< Number of header fifos used in this group
+ - 0 - headers not being saved,
+ - 1 - headers being saved. */
+
+ unsigned mask; /*!< All reads to the status for this group are
+ masked by this, so you only see results for
+ this group.
+ - Group 0: 0xFF000000
+ - Group 1: 0x00FF0000
+ - Group 2: 0x0000FF00
+ - Group 3: 0x000000FF */
+
+ void *unused1; /*!< Unused space */
+
+ DMA_RecFifoStatus_t *status_ptr; /*!< Pointer to the status, in DMA SRAM. */
+
+ DMA_RecFifo_t fifos[DMA_NUM_REC_FIFOS_PER_GROUP]; /*!< Rec Fifos.
+ Indexes 0 through DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1 are
+ the normal fifos in the group.
+ Index DMA_HEADER_REC_FIFO_ID is the header fifo in the
+ group.
+ Note: fifos[0] may not be local fifo number 0 in the group.*/
+}
+ALIGN_L1D_CACHE DMA_RecFifoGroup_t;
+
+
+/*!
+ * \brief DMA Reception Fifo Receive Function Prototype
+ *
+ * Receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface, which assigns a registration ID
+ * to the function. When the polling functions process a packet in a
+ * reception fifo, the appropriate receive function for that packet is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.
+ *
+ * \param[in] f_ptr Pointer to the reception fifo.
+ * \param[in] packet_ptr Pointer to the packet header (== va_head).
+ * This is quad-aligned for optimized copying.
+ * \param[in] recv_func_parm Pointer to storage specific to this receive
+ * function. This pointer was specified when the
+ * receive function was registered with the kernel,
+ * and is passed to the receive function
+ * unchanged.
+ * \param[in] payload_ptr Pointer to the beginning of the payload.
+ * This is quad-aligned for optimized copying.
+ * \param[in] payload_bytes Number of bytes in the payload. Note that this
+ * may be larger than the number of valid bytes
+ * in the payload because it is rounded such that
+ * it (payload_bytes) + 16 (size of packet header)
+ * is a multiple of 32 bytes. Thus, if the size
+ * of the message is 64, payload_bytes is 80 such
+ * that the total packet size is 96, a multiple of
+ * 32.
+ *
+ * \retval 0 No errors found while processing the payload.
+ * \retval negative_number Errors found while processing the payload.
+ */
+typedef int (*DMA_RecFifoRecvFunction_t)(
+ DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ );
+
+
+/*!
+ * \brief DMA Reception Fifo Default Error Receive Function
+ *
+ * This is the default function that will handle packets having an
+ * unregistered registration ID.
+ *
+ * Receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface, which assigns a registration ID
+ * to the function. When the polling functions process a packet in a
+ * reception fifo that has a registration ID that does not have a corresponding
+ * receive function registered, this error receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location.
+ *
+ * \param[in] f_ptr Pointer to the reception fifo.
+ * \param[in] packet_ptr Pointer to the packet header (== va_head).
+ * This is quad-aligned for optimized copying.
+ * \param[in] recv_func_parm Pointer to storage specific to this receive
+ * function. This pointer was specified when the
+ * receive function was registered with the kernel,
+ * and is passed to the receive function
+ * unchanged.
+ * \param[in] payload_ptr Pointer to the beginning of the payload.
+ * This is quad-aligned for optimized copying.
+ * \param[in] payload_bytes Number of bytes in the payload
+ *
+ * \retval -1 An unregistered packet was just processed. This is considered
+ * an error.
+ */
+int DMA_RecFifoDefaultErrorRecvFunction(
+ DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ );
+
+
+/*!
+ * \brief Set DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that
+ * - Sets DCRs establishing the map between the hardware torus fifos and the
+ * DMA reception fifos that are to receive the packets from those hardware
+ * torus fifos.
+ * - Sets DCRs establishing the DMA reception fifos that are to receive
+ * local transfer packets.
+ * - Sets the DCRs establishing the type (0 or 1) of each reception fifo.
+ * - Sets the DCRs establishing the threshold for type 0 and 1 reception fifos.
+ * - Leaves all of the fifos that are used in a "disabled" state.
+ * DMA_RecFifoInitById() initializes and enables the fifos.
+ *
+ * \param[in] rec_map Reception Fifo Map structure, defining the mapping.
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h.
+ * _bgp_err_dma_rfifo_map_twice means the mapping has
+ * already been set.
+ *
+ * \note This function should be called once per job, after DMA_ResetRelease().
+ * It may be called by any core, but once a core has called it, other
+ * calls by that same core or any other core will fail.
+ *
+ * \note During job init, the kernel sets up the DCR clear masks for each
+ * reception fifo group (DCRs 0xD68 - 0xD6C) such that a write to clear
+ * a fifo in group g only clears group g.
+ *
+ */
+__INLINE__ int DMA_RecFifoSetMap(
+ DMA_RecFifoMap_t * rec_map
+ )
+{
+ int rc;
+ rc = Kernel_RecFifoSetMap((uint32_t*)rec_map);
+ return rc;
+}
+
+
+/*!
+ * \brief Get DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that returns a DMA
+ * reception fifo map structure, filled in according to the DCRs.
+ *
+ * \param[in,out] rec_map A pointer to a Reception Fifo Map structure
+ * that will be filled-in upon return.
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+__INLINE__ int DMA_RecFifoGetMap(
+ DMA_RecFifoMap_t *rec_map
+ )
+{
+ int rc;
+ rc = Kernel_RecFifoGetMap((uint32_t*)rec_map);
+ return rc;
+}
+
+
+/*!
+ * \brief Get DMA Reception Fifo Group
+ *
+ * This is a wrapper around a System Call. This function returns THE
+ * one-and-only pointer to the fifo group structure, with the entries all
+ * filled in from info in the DCRs. If called multiple times with the same
+ * group, it will always return the same pointer, and the system call will
+ * not be invoked again.
+ *
+ * It must be called AFTER DMA_RecFifoSetMap().
+ *
+ * By convention, the same "target" is used for normal and header fifo
+ * interrupts (could be changed). In addition, by convention, interrupts for
+ * fifos in group g come out of the DMA as non-fatal irq bit 28+g,
+ * ie, only fifos in group g can cause the "type g" threshold interrupts.
+ *
+ * \param[in] grp The group number (0 through DMA_NUM_REC_FIFO_GROUPS).
+ * \param[in] target The core that will receive the interrupt when a
+ * fifo in this group reaches its threshold
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * Ignored on subsequent call with the same group.
+ * \param[in] normal_handler A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * normal fifo in this group reaches its threshold.
+ * This function must be coded to take 4 uint32_t
+ * parameters:
+ * - A pointer to storage specific to this
+ * handler. This is the normal_handler_parm
+ * specified on this function call.
+ * - 3 uint32_t parameters that are not used.
+ * If normal_handler is NULL, threshold interrupts
+ * are not delivered for normal fifos in this group.
+ * Ignored on subsequent call with the same group.
+ * \param[in] normal_handler_parm A pointer to storage that should be passed
+ * to the normal interrupt handling function
+ * (see normal_handler parameter).
+ * Ignored on subsequent call with the same
+ * group.
+ * \param[in] header_handler ** This parameter is deprecated. Specify NULL.**
+ * A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * header fifo in this group reaches its threshold.
+ * This function must be coded to take 2 parameters:
+ * void* A pointer to storage specific to this
+ * handler. This is the header_handler_parm
+ * specified on this function call.
+ * int The global fifo ID of the fifo that hit
+ * its threshold (0 through
+ * NUM_DMA_REC_FIFOS-1).
+ * If header_handler is NULL, threshold interrupts
+ * are not delivered for header fifos in this group.
+ * Ignored on subsequent call with the same group.
+ * \param[in] header_handler_parm ** This parameter is deprecated. Specify
+ * NULL. **
+ * A pointer to storage that should be passed
+ * to the header interrupt handling function
+ * (see header_handler parameter).
+ * Ignored on subsequent call with the same
+ * group.
+ * \param[in] interruptGroup A InterruptGroup_t that identifies the
+ * group of interrupts that the fifos in this group
+ * will become part of.
+ * Ignored on subsequent call with the same group.
+ *
+ * \return RecFifoGroupStruct Pointer to a DMA Reception Fifo Group structure
+ * that reflects the fifos that are being used in
+ * this group. This same structure is shared by
+ * all users of this reception fifo group.
+ * NULL is returned if an error occurs.
+ */
+DMA_RecFifoGroup_t *
+DMA_RecFifoGetFifoGroup(
+ int grp,
+ int target,
+ Kernel_CommThreadHandler normal_handler,
+ void *normal_handler_parm,
+ Kernel_CommThreadHandler header_handler,
+ void *header_handler_parm,
+ Kernel_InterruptGroup_t interruptGroup
+ );
+
+
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Calls to access the Fifo, given a reception fifo structure
+ * -----------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief Increment DMA Reception Fifo Head
+ *
+ * Increment a DMA reception fifo's "head", given a reception fifo structure
+ *
+ * \param[in] f_ptr Pointer to the reception fifo structure
+ * \param[in] incr The number of quads (16 byte units) to increment the
+ * head pointer by.
+ *
+ * \return None
+ *
+ * \post va_head is set in both the hardware and software fifo structures,
+ * and the fifo free space is recalculated.
+ *
+ */
+__INLINE__ void DMA_RecFifoIncrementHead(
+ DMA_RecFifo_t *f_ptr,
+ unsigned int incr
+ )
+{
+ SPI_assert( f_ptr != NULL );
+
+ {
+ void *va_head = DMA_FifoGetHeadNoFreeSpaceUpdate( &f_ptr->dma_fifo );
+
+ void *va_end = DMA_FifoGetEndFromShadow( &f_ptr->dma_fifo );
+
+ unsigned int incr_bytes = incr << 4;
+
+ unsigned int bytes_to_end = (unsigned)va_end - (unsigned)va_head;
+
+ /*
+ * Note: The following check must be >= instead of just >. We never want
+ * the head to be equal to the end so we can always copy a quad
+ * from the head, safely.
+ */
+ if ( incr_bytes >= bytes_to_end )
+ {
+ va_head = (char *)
+ ( (unsigned)DMA_FifoGetStartFromShadow( &f_ptr->dma_fifo ) +
+ ( incr_bytes - bytes_to_end ) );
+ }
+ else
+ {
+ va_head = (char *)( (unsigned)va_head + incr_bytes );
+ }
+
+ /* Set the head and update the fifo's free space */
+ DMA_FifoSetHead( &f_ptr->dma_fifo,
+ va_head );
+ }
+}
+
+
+/*!
+ * \brief Get the "Not Empty" Status of a Reception Fifo Group
+ *
+ * Get the "Not Empty" status of the reception fifos that are being used in the
+ * specified "not empty" word.
+ *
+ * \param[in] fg_ptr Pointer to the reception fifo group structure
+ * \param[in] word The word (0 or 1) of the "not empty" status to be
+ * returned.
+ *
+ * \retval notEmptyStatus A 32-bit value:
+ * If "word" is 0, bit i is 1 if normal rec fifo i is
+ * in use and is not empty.
+ * If "word" is 1, bits 7, 15, 23, 31 are 1 if header
+ * rec fifos for groups 1, 2, 3, 4 respectively are in
+ * use and not empty.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetNotEmpty(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int word
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+ SPI_assert( (word == 0) || (word == 1) );
+
+ /* printf("RecFifoGetNotEmpty: group=%d, status addr=0x%08x, not_empty=0x%08x, mask=0x%08x, RecFifoHwAddr=0x%08x, RecFifo4PaTail=0x%08x, PaHead=0x%08x\n", */
+ /* fg_ptr->group_id, (unsigned)(&(fg_ptr->status_ptr->not_empty[word])), */
+ /* fg_ptr->status_ptr->not_empty[word], fg_ptr->mask, */
+ /* (unsigned)(fg_ptr->fifos[4].dma_fifo.fifo_hw_ptr), */
+ /* fg_ptr->fifos[4].dma_fifo.fifo_hw_ptr->pa_tail, */
+ /* fg_ptr->fifos[4].dma_fifo.fifo_hw_ptr->pa_head); */
+
+ return ( fg_ptr->status_ptr->not_empty[word] & fg_ptr->mask );
+
+}
+
+
+/*!
+ * \brief Get the "Available" Status of a Reception Fifo Group
+ *
+ * Get the "available" status of the reception fifos that are being used in the
+ * specified "available" word.
+ *
+ * \param[in] fg_ptr Pointer to the reception fifo group structure
+ * \param[in] word The word (0 or 1) of the "available" status to be
+ * returned.
+ *
+ * \retval availableStatus A 32-bit value:
+ * If "word" is 0, bit i is 1 if normal rec fifo i is
+ * in use and is available.
+ * If "word" is 1, bits 7, 15, 23, 31 are 1 if header
+ * rec fifos for groups 1, 2, 3, 4 respectively are in
+ * use and available.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetAvailable(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int word
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+ SPI_assert( (word == 0) || (word == 1) );
+
+ return ( fg_ptr->status_ptr->available[word] & fg_ptr->mask );
+}
+
+
+/*!
+ * \brief Get the "Threshold Crossed" Status of a Reception Fifo Group
+ *
+ * Get the "threshold crossed" status of the reception fifos that are being
+ * used in the specified "threshold crossed" word.
+ *
+ * \param[in] fg_ptr Pointer to the reception fifo group structure
+ * \param[in] word The word (0 or 1) of the "threshold crossed" status
+ * to be returned.
+ *
+ * \retval thresholdCrossedStatus A 32-bit value:
+ * If "word" is 0, bit i is 1 if normal rec fifo i is
+ * in use and its threshold has been crossed.
+ * If "word" is 1, bits 7, 15, 23, 31 are 1 if header
+ * rec fifos for groups 1, 2, 3, 4 respectively are in
+ * use and their threshold has been crossed.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetThresholdCrossed(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int word
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+ SPI_assert( (word == 0) || (word == 1) );
+
+ return ( fg_ptr->status_ptr->threshold_crossed[word] & fg_ptr->mask );
+}
+
+
+/*!
+ * \brief Set the "Clear Threshold Crossed" Status of Specified Fifos in a
+ * Reception Fifo Group
+ *
+ * Set the "clear threshold crossed" status of the specified reception fifos
+ * in the specified "clear threshold crossed" word.
+ *
+ * \param[in] fg_ptr Pointer to the reception fifo group structure
+ * \param[in] clr 32-bit value, specifying which fifos are to have
+ * their "clear threshold crossed" status set.
+ * If "word" is 0, bit i is 1 if normal rec fifo i is
+ * to have its "clear threshold crossed" status set.
+ * If "word" is 1, one of bits 7, 15, 23, 31 is 1 if
+ * header fifo for group 1, 2, 3, 4 respectively is to
+ * have its "clear threshold crossed" status set.
+ * Fifos that are not in the group will not have their
+ * status set.
+ * \param[in] word The word (0 or 1) of the "clear threshold crossed"
+ * status to be set.
+ *
+ * \return None
+ *
+ * \note This function does an MBAR after setting the status to ensure the
+ * writes have been accepted by the memory system before allowing other
+ * memory accesses to to occur.
+*/
+__INLINE__ void DMA_RecFifoSetClearThresholdCrossed(
+ DMA_RecFifoGroup_t *fg_ptr,
+ unsigned int clr,
+ int word
+ )
+{
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( fg_ptr->status_ptr != NULL );
+ SPI_assert( (word == 0) || (word == 1) );
+
+ fg_ptr->status_ptr->clear_threshold_crossed[word] = clr & fg_ptr->mask;
+
+ _bgp_mbar();
+
+}
+
+
+/*
+ * -----------------------------------------------------------------------------
+ * Calls to access the Fifo, given a fifo group and a fifo ID
+ * -----------------------------------------------------------------------------
+ */
+
+
+
+
+/*!
+ * \brief DMA RecFifo Initialization By Id
+ *
+ * - For a DMA reception fifo, initialize its start, head, tail, and end.
+ * - Compute fifo size and free space.
+ *
+ * \param[in] fg_ptr Pointer to fifo group structure.
+ * \param[in] fifo_id Id of the fifo to be initialized
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in] va_start Virtual address of the start of the fifo.
+ * \param[in] va_head Virtual address of the head of the fifo (typically
+ * equal to va_start).
+ * \param[in] va_end Virtual address of the end of the fifo.
+ *
+ * \retval 0 Successful.
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h.
+ * _bgp_err_dma_rfifo_map_twice means this fifo has
+ * already been initialized
+ *
+ */
+__INLINE__ int DMA_RecFifoInitById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id,
+ void *va_start,
+ void *va_head,
+ void *va_end
+ )
+{
+ int rc;
+
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( ( (uint32_t) va_end - (uint32_t)va_start ) >=
+ DMA_MIN_REC_FIFO_SIZE_IN_BYTES );
+
+ /*
+ * Initialize the fifo by invoking a system call.
+ */
+
+ rc = Kernel_RecFifoInitById(
+ (uint32_t*)fg_ptr,
+ fifo_id,
+ va_start,
+ va_head,
+ va_end);
+
+ return rc;
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Start Pointer from the Shadow Using a Fifo Id
+ *
+ * Get a DMA reception fifo's start pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval va_start The virtual address of the start of the fifo
+ *
+ */
+__INLINE__ void * DMA_RecFifoGetStartById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ return DMA_FifoGetStartFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Head Pointer Using a Fifo Id
+ *
+ * Get a DMA reception fifo's head pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval va_head The virtual address of the head of the fifo
+ *
+ */
+__INLINE__ void * DMA_RecFifoGetHeadById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ return DMA_FifoGetHead( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Tail Pointer Using a Fifo Id
+ *
+ * Get a DMA reception fifo's tail pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval va_tail The virtual address of the tail of the fifo
+ *
+ */
+__INLINE__ void *DMA_RecFifoGetTailById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ return DMA_FifoGetTail( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo End Pointer from the Shadow Using a Fifo Id
+ *
+ * Get a DMA reception fifo's end pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval va_end The virtual address of the end of the fifo
+ *
+ */
+__INLINE__ void *DMA_RecFifoGetEndById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ return DMA_FifoGetEndFromShadow( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Size Using a Fifo Id
+ *
+ * Get a DMA reception fifo's size, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval size The size of the DMA fifo, in units of 16B quads.
+ *
+ */
+__INLINE__ unsigned int DMA_RecFifoGetSizeById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ return DMA_FifoGetSize( &fg_ptr->fifos[fifo_id].dma_fifo );
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Free Space Using a Fifo Id
+ *
+ * Get a DMA reception fifo's free space, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in] read_head Indicates whether to read the head from the hardware
+ * fifo before calculating the free space.
+ * - 1 means to read the hardware head
+ * - 0 means to use the current head shadow
+ * \param[in] read_tail Indicates whether to read the tail from the hardware
+ * fifo before calculating the free space.
+ * - 1 means to read the hardware tail
+ * - 0 means to use the current tail shadow
+ *
+ * \retval freeSpace The amount of free space in the fifo, in units of
+ * 16B quads.
+ *
+ */
+__INLINE__ unsigned int DMA_RecFifoGetFreeSpaceById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id,
+ unsigned int read_head,
+ unsigned int read_tail
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ return DMA_FifoGetFreeSpace( &fg_ptr->fifos[fifo_id].dma_fifo,
+ read_head,
+ read_tail );
+}
+
+
+/*!
+ * \brief Set DMA RecFifo Head Pointer Using a Fifo Id
+ *
+ * Set a DMA reception fifo's head pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in] va_head The virtual address of the head of the fifo.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_RecFifoSetHeadById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id,
+ void *va_head
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ DMA_FifoSetHead( &fg_ptr->fifos[fifo_id].dma_fifo,
+ va_head);
+}
+
+
+/*!
+ * \brief Set DMA RecFifo Tail Pointer Using a Fifo Id
+ *
+ * Set a DMA reception fifo's tail pointer, given a fifo group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in] va_tail The virtual address of the tail of the fifo.
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_RecFifoSetTailById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id,
+ void *va_tail
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ DMA_FifoSetTail( &fg_ptr->fifos[fifo_id].dma_fifo,
+ va_tail);
+}
+
+
+/*!
+ * \brief Increment DMA RecFifo Head Pointer Using a Fifo Id
+ *
+ * Increment a DMA reception fifo's head pointer, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in] incr The number of quads (16 byte units) to increment the
+ * head pointer by.
+ *
+ * \return None
+ *
+*/
+__INLINE__ void DMA_RecFifoIncrementHeadById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id,
+ unsigned int incr
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ DMA_RecFifoIncrementHead( &fg_ptr->fifos[fifo_id],
+ incr);
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Not Empty Status Using a Fifo Id
+ *
+ * Get a DMA reception fifo's not empty status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval 0 The specified fifo is empty.
+ * non-zero The specified fifo is not empty.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetNotEmptyById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ {
+ int word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id);
+
+ unsigned status;
+ status = DMA_RecFifoGetNotEmpty(fg_ptr,
+ word);
+ if ( word == 0 )
+ {
+ /* If normal fifo, mask with the correct bit number. */
+ status = status & _BN(fg_ptr->fifos[fifo_id].global_fifo_id);
+ }
+ /* For header fifo, don't need additional mask because the status word was
+ * already masked by the 8 bits for this group, leaving only the 1 bit for
+ * the header fifo.
+ */
+
+ return status;
+ }
+
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Available Status Using a Fifo Id
+ *
+ * Get a DMA reception fifo's available status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval 0 The specified fifo is not available.
+ * non-zero The specified fifo is available.
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetAvailableById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ {
+ int word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id);
+
+ unsigned status;
+ status = DMA_RecFifoGetAvailable(fg_ptr,
+ word);
+ if ( word == 0 )
+ {
+ /* If normal fifo, mask with the correct bit number. */
+ status = status & _BN(fg_ptr->fifos[fifo_id].global_fifo_id);
+ }
+ /* For header fifo, don't need additional mask because the status word was
+ * already masked by the 8 bits for this group, leaving only the 1 bit for
+ * the header fifo.
+ */
+
+ return status;
+ }
+
+}
+
+
+/*!
+ * \brief Get DMA RecFifo Threshold Crossed Status Using a Fifo Id
+ *
+ * Get a DMA reception fifo's threshold crossed status, given a fifo group and
+ * fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \retval 0 The specified fifo has not had its threshold crossed
+ * non-zero The specified fifo has had its threshold crossed
+ *
+ */
+__INLINE__ unsigned DMA_RecFifoGetThresholdCrossedById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ {
+ int word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id);
+
+ unsigned status;
+ status = DMA_RecFifoGetThresholdCrossed(fg_ptr,
+ word);
+ if ( word == 0 )
+ {
+ /* If normal fifo, mask with the correct bit number. */
+ status = status & _BN(fg_ptr->fifos[fifo_id].global_fifo_id);
+ }
+ /* For header fifo, don't need additional mask because the status word was
+ * already masked by the 8 bits for this group, leaving only the 1 bit for
+ * the header fifo.
+ */
+
+ return status;
+ }
+
+}
+
+
+/*!
+ * \brief Set DMA RecFifo Clear Threshold Crossed Status Using a Fifo Id
+ *
+ * Set a DMA reception fifo's "clear threshold crossed" status, given a fifo
+ * group and fifo id.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group structure
+ * \param[in] fifo_id Id of the fifo within the group
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ *
+ * \return None
+ *
+ */
+__INLINE__ void DMA_RecFifoSetClearThresholdCrossedById(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ unsigned int clr;
+ int word;
+
+ SPI_assert( (fifo_id >= 0) && (fifo_id < DMA_NUM_REC_FIFOS_PER_GROUP) );
+ SPI_assert( fg_ptr != NULL );
+
+ word = DMA_REC_FIFO_GROUP_WORD_ID(fg_ptr->fifos[fifo_id].global_fifo_id);
+
+ if ( word == 0 )
+ {
+ /* If normal fifo, mask with the correct bit number so we only specify the
+ * bit corresponding to this normal fifo.
+ * Note: The fg_ptr->mask shouldn't be necessary, but it is a bit safer.
+ */
+ clr = ( _BN(fg_ptr->fifos[fifo_id].global_fifo_id) & fg_ptr->mask);
+ }
+ else
+ {
+ /* If header fifo, it is ok to just clear all of the mask bits for this
+ * group, since only 1 bit is used inside the DMA.
+ */
+ clr = fg_ptr->mask;
+ }
+
+ DMA_RecFifoSetClearThresholdCrossed(fg_ptr,
+ clr,
+ word); /* Write to the DMA SRAM */
+}
+
+
+/*!
+ * \brief Register a Reception Fifo Receive Function
+ *
+ * Register a specified receive function to handle packets having a specific
+ * "registration ID". It returns a registration ID (0-255) that is to be used
+ * in the packet header Func_Id field, such that packets that arrive in a
+ * reception fifo will result in the corresponding receive function being called
+ * when that fifo is processed by a polling or interrupt handler function.
+ *
+ * \param[in] recv_func Pointer to the receive function.
+ * \param[in] recv_func_parm Arbitrary pointer to be passed to the
+ * recv_func when it is called.
+ * \param[in] is_error_function 1 means this is the receiver function
+ * to be called if a packet contains an invalid
+ * (unregistered) registration ID. The return
+ * value from this function is zero, indicating
+ * success, not indicating a registration ID.
+ * A default function is provided if one is not
+ * registered. If there is already a non-default
+ * error receive function registered, -EBUSY is
+ * returned.
+ * 0 means this is not the error receiver
+ * function.
+ * \param[in] is_header_fifo Indicates whether the fifo is normal or
+ * header:
+ * - 0 is normal. The return code is the
+ * registration ID.
+ * - 1 is header. The return code is 0,
+ * indicating success, because packets in
+ * header fifos are direct-put packets, and
+ * hence have no registration ID.
+ * If there is already a header receive function
+ * registered, -EBUSY is returned.
+ *
+ * If both is_error_function and is_header_fifo are 1, -EINVAL is returned.
+ *
+ * \retval 0 This is a registration ID if is_error_function=0 and
+ * is_header_fifo=0. Otherwise, it indicates success.
+ * 1-255 This is a registration ID. Successful.
+ * negative Failure. This is a negative errno value.
+ */
+int DMA_RecFifoRegisterRecvFunction(
+ DMA_RecFifoRecvFunction_t recv_func,
+ void *recv_func_parm,
+ int is_error_function,
+ int is_header_fifo
+ );
+
+
+/*!
+ * \brief De-Register a Reception Fifo Receive Function
+ *
+ * De-register a previously registered receive function.
+ *
+ * \param[in] registrationId Registration Id returned from
+ * DMA_RecFifoRegisterRecvFunction (0..255).
+ * A negative value means that no
+ * registration id is specified.
+ * \param[in] is_error_function 1 means the error receive function is
+ * to be de-registered.
+ * 0 otherwise.
+ * \param[in] is_header_fifo 1 means the header fifo receive function is
+ * to be de-registered.
+ * 0 otherwise.
+ *
+ * \retval 0 Success
+ * negative Error value
+ *
+ * \see DMA_RecFifoRegisterRecvFunction
+ */
+int DMA_RecFifoDeRegisterRecvFunction(
+ int registrationId,
+ int is_error_function,
+ int is_header_fifo
+ );
+
+
+/*!
+ * \brief Poll Normal Reception Fifos
+ *
+ * Poll the "normal" reception fifos in the specified fifo group, removing one
+ * packet after another from the fifos, dispatching the appropriate receive
+ * function for each packet, until one of the following occurs:
+ * 1. Total_packets packets are received
+ * 2. All the fifos are empty
+ * 3. A receive function returns a non-zero value
+ * 4. The last packet removed from a fifo has an invalid registration id. The
+ * error receive function will have been called, but polling ends.
+ * The invalid packet is counted as a processed packet, and the return
+ * code from the error receive function is returned.
+ *
+ * Polling occurs in a round-robin fashion through the array of normal fifos in
+ * the group, beginning with array index starting_index. If a fifo has a packet,
+ * the appropriate receive function is called. Upon return, the packet is
+ * removed from the fifo (the fifo head is moved past the packet).
+ *
+ * After processing packets_per_fifo packets in a fifo (or emptying that fifo),
+ * the next fifo in the group is processed. When the last index in the fifo
+ * array is processed, processing continues with the first fifo in the array.
+ * Multiple loops through the array of fifos in the group may occur.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location. The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in] total_packets The maximum number of packets that will be
+ * processed.
+ * \param[in] packets_per_fifo The maximum number of packets that will be
+ * processed in a given fifo before switching
+ * to the next fifo.
+ * \param[in] starting_index The fifos in the fifo group are maintained
+ * in an array. This is the array index of the
+ * first fifo to be processed (0 through
+ * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in] num_empty_passes The number of passes over the normal fifos
+ * while they are empty that this function
+ * should tolerate before giving up and
+ * returning. This is an optimization
+ * to catch late arriving packets.
+ * \param[in] not_empty_poll_delay The number of pclks to delay between polls
+ * of the not-empty status when the fifos are
+ * empty.
+ * \param[in] fg_ptr Pointer to the fifo group.
+ * \param[out] next_fifo_index Pointer to an int where the recommended
+ * starting_index for the next call is returned.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+ * next_fifo_index is set.
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end. next_fifo_index is
+ * set.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ * \todo By setting fg_ptr->interrupt_lock? or by calling
+ * the system call to disable a class of interrupts?
+ *
+ * \note next_fifo_index is set to the index of the fifo that had the last
+ * packet received if all packets_per_fifo packets were not received from
+ * that fifo. However, if all packets_per_fifo packets were received
+ * from that fifo, the index of the next fifo will be returned.
+ *
+ */
+int DMA_RecFifoPollNormalFifos(int total_packets,
+ int packets_per_fifo,
+ int starting_index,
+ int num_empty_passes,
+ int not_empty_poll_delay,
+ DMA_RecFifoGroup_t *fg_ptr,
+ int *next_fifo_index
+ );
+
+
+/*!
+ * \brief Simple Poll Normal Reception Fifos
+ *
+ * Poll the "normal" reception fifos in the specified fifo group, removing one
+ * packet after another from the fifos, dispatching the appropriate receive
+ * function for each packet, until one of the following occurs:
+ * 1. All packets in all of the fifos have been received.
+ * 2. A receive function returns a non-zero value.
+ * 3. The last packet removed from a fifo has an invalid registration id. The
+ * error receive function will have been called, but polling ends.
+ * The invalid packet is counted as a processed packet, and the return
+ * code from the error receive function is returned.
+ * 4. There have been fruitfulPollLimit polls attempted (summed across all
+ * fifos).
+ *
+ * Polling occurs in a round-robin fashion through the array of normal fifos in
+ * the group. If a fifo has a packet, the appropriate receive function is
+ * called. Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing all of the packets in a fifo (or emptying that fifo),
+ * the next fifo in the group is processed. When the last index in the fifo
+ * array is processed, processing continues with the first fifo in the array.
+ * Multiple loops through the array of fifos in the group may occur until all
+ * fifos are empty or fruitfulPollLimit polls have been completed.
+ *
+ * It is risky to set the fruitfulPollLimit to zero, allowing this function to
+ * poll indefinitely as long as there are packets to be processed. This may
+ * starve the node in a scenario where other nodes send "polling" packets to
+ * our node, and our node never gets a chance to do anything else except
+ * process those polling packets.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location. The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group.
+ * \param[in] fruitfulPollLimit The limit on the number of fruitful polls that
+ * will be attempted (summed across all fifos).
+ * If the limit is reached, this function
+ * returns. A value of zero means there is no
+ * limit imposed. A fruitful poll is one where
+ * at least one packet has arrived in the fifo
+ * since the last poll.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ *
+ */
+int DMA_RecFifoSimplePollNormalFifos( DMA_RecFifoGroup_t *fg_ptr,
+ int fruitfulPollLimit);
+
+/*!
+ * \brief Poll Normal Reception Fifo Given a Fifo Group and Fifo ID
+ *
+ * Poll the specified "normal" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1. num_packets packets are received
+ * 2. The specified fifo is empty
+ * 3. A receive function returns a non-zero value
+ * 4. The last packet removed from the fifo has an invalid registration id. The
+ * error receive function will have been called, but polling ends.
+ * The invalid packet is counted as a processed packet, and the return
+ * code from the error receive function is returned.
+ *
+ * If the specified fifo has a packet, the appropriate receive function is
+ * called. Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing num_packets packets in the fifo (or emptying that fifo),
+ * the function returns the number of packets processed *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location. The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in] num_packets The maximum number of packets that will be
+ * processed.
+ * \param[in] fifo_id The ID of the fifo to be polled.
+ * (0 through
+ * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in] num_empty_passes The number of passes over the fifo
+ * while it is empty that this function
+ * should tolerate before giving up and
+ * returning. This is an optimization
+ * to catch late arriving packets.
+ * \param[in] not_empty_poll_delay The number of pclks to delay between polls
+ * of the not-empty status when the fifos are
+ * empty.
+ * \param[in] fg_ptr Pointer to the fifo group.
+ *
+ * \param[in] empty_callback Function to call when spinning because the FIFO looks empty.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ * \todo By setting fg_ptr->interrupt_lock? or by calling
+ * the system call to disable a class of interrupts?
+ *
+ */
+int DMA_RecFifoPollNormalFifoById( int num_packets,
+ int fifo_id,
+ int num_empty_passes,
+ int not_empty_poll_delay,
+ DMA_RecFifoGroup_t *fg_ptr,
+ void (*empty_callback)(void)
+ );
+
+
+/*!
+ * \brief Simple Poll Normal Reception Fifo Given a Fifo Group and Fifo ID
+ *
+ * Poll the specified "normal" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1. All packets in the fifo have been received.
+ * 2. The specified fifo is empty.
+ * 3. A receive function returns a non-zero value.
+ * 4. The last packet removed from the fifo has an invalid registration id. The
+ * error receive function will have been called, but polling ends.
+ * The invalid packet is counted as a processed packet, and the return
+ * code from the error receive function is returned.
+ * 5. There have been fruitfulPollLimit polls attempted.
+ *
+ * If the specified fifo has a packet, the appropriate receive function is
+ * called. Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing all of the packets in the fifo (emptying that fifo),
+ * or the fruitfulPollLimit has been reached, the function returns the number
+ * of packets processed.
+ *
+ * It is risky to set the fruitfulPollLimit to zero, allowing this function to
+ * poll indefinitely as long as there are packets to be processed. This may
+ * starve the node in a scenario where other nodes send "polling" packets to
+ * our node, and our node never gets a chance to do anything else except
+ * process those polling packets.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always pass a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function has
+ * to copy it to some other location. The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in] fifo_id The ID of the fifo to be polled.
+ * (0 through
+ * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in] fg_ptr Pointer to the fifo group.
+ * \param[in] fruitfulPollLimit The limit on the number of fruitful polls that
+ * will be attempted.
+ * If the limit is reached, this function
+ * returns. A value of zero means there is no
+ * limit imposed. A fruitful poll is one where
+ * at least one packet has arrived in the fifo
+ * since the last poll.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ *
+ */
+int DMA_RecFifoSimplePollNormalFifoById( int fifo_id,
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fruitfulPollLimit
+ );
+
+
+
+/*!
+ * \brief Poll Header Reception Fifo Given a Fifo Group
+ *
+ * Poll the "header" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1. Total_packets packets are received
+ * 2. The specified fifo is empty
+ * 3. A receive function returns a non-zero value
+ *
+ * If the header fifo has a packet, the appropriate receive function is
+ * called. Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing num_packets packets in the fifo (or emptying that fifo),
+ * the function returns the number of packets processed.
+ *
+ * The receive function must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header. The packet header is always
+ * 16 bytes of contiguous storage, in the fifo. When the
+ * receive function returns, user code cannot assume that the buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location. The packet header is 16-byte aligned
+ * for optimized copying.
+ *
+ * \param[in] num_packets The maximum number of packets that will be
+ * processed.
+ * \param[in] num_empty_passes The number of passes over the fifo
+ * while it is empty that this function
+ * should tolerate before giving up and
+ * returning. This is an optimization
+ * to catch late arriving packets.
+ * \param[in] not_empty_poll_delay The number of pclks to delay between polls
+ * of the not-empty status when the fifos are
+ * empty.
+ * \param[in] fg_ptr Pointer to the fifo group.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ *
+ */
+int DMA_RecFifoPollHeaderFifo( int num_packets,
+ int num_empty_passes,
+ int not_empty_poll_delay,
+ DMA_RecFifoGroup_t *fg_ptr
+ );
+
+
+
+__END_DECLS
+
+
+#endif
diff --git a/arch/powerpc/include/spi/bpcore_interface.h b/arch/powerpc/include/spi/bpcore_interface.h
new file mode 100644
index 00000000000000..59e0ee2a22d3bf
--- /dev/null
+++ b/arch/powerpc/include/spi/bpcore_interface.h
@@ -0,0 +1,41 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file spi/bpcore_interface.h
+ */
+#ifndef _BGP_BPCORE_INT_H_ /* Prevent multiple inclusion */
+#define _BGP_BPCORE_INT_H_
+
+#define _BGP_UA_SCRATCH (0x4) /* eDRAM Scratch: 0 to 8MB */
+#define _BGP_PA_SCRATCH (0x00000000)
+#define _BGP_PS_SCRATCH (8 * 1024 * 1024)
+#define _BGP_PM_SCRATCH (0x007FFFFF)
+
+/* ************************************************************************* */
+/* DMA Non-Fatal Interrupt Request: Group 3 bits 00:31 */
+/* ************************************************************************* */
+
+#define _BGP_IC_DMA_NFT_G3_HIER_POS 3
+#define _BGP_IC_DMA_NFT_G3_UNIT_NUM 3
+#define _BGP_IC_DMA_NFT_G3_UNIT_POS 0
+#define _BGP_IC_DMA_NFT_G3_UNIT_SIZE 32
+#define _BGP_IC_DMA_NFT_G3_UNIT_MASK 0xffffffff
+
+
+#endif /* Add nothing below this line */
diff --git a/arch/powerpc/include/spi/kernel_interface.h b/arch/powerpc/include/spi/kernel_interface.h
new file mode 100644
index 00000000000000..05ddd30ffd6cb1
--- /dev/null
+++ b/arch/powerpc/include/spi/kernel_interface.h
@@ -0,0 +1,1982 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/**
+ * \file spi/kernel_interface.h
+ */
+
+#ifndef _BGP_VIRT2PHYS_H_ /* Prevent multiple inclusion */
+#define _BGP_VIRT2PHYS_H_
+
+
+#if defined(__LINUX__) || defined(__LINUX_KERNEL__)
+
+#include <spi/linux_interface.h>
+
+/* #warning Using LINUX kernel interface for SPI */
+
+#else
+
+#warning Using CNK kernel interface for SPI
+#error Should not be using CNK interface, this is in the Linux kernel tree
+
+
+#include <common/namespace.h>
+
+__BEGIN_DECLS
+
+#include <common/linkage.h>
+#include <bpcore/bgp_types.h>
+#include <bpcore/ppc450_core.h>
+#include <bpcore/ppc450_inlines.h>
+#include <spi/bpcore_interface.h>
+#include <spi/bgp_kernel_inlines.h>
+#include <common/bgp_ras.h>
+#include <cnk/VirtualMap.h>
+#include <cnk/vmm.h>
+#include <cnk/bgp_SPRG_Usage.h>
+#include <cnk/bgp_SysCall_Extensions.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <string.h>
+#include <errno.h>
+
+
+#if ((!defined(__CNK__)) && (!defined(__BL__)))
+#include <pthread.h>
+#endif
+
+#ifndef __INLINE__
+#define __INLINE__ extern inline
+#endif
+
+
+
+/*!
+ * \brief Communication Thread interrupt handler function prototype
+ *
+ * \param[in] arg1 1st argument to commthread
+ * \param[in] arg2 2nd argument to commthread
+ * \param[in] arg3 3rd argument to commthread
+ */
+typedef void (*Kernel_CommThreadHandler)(uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4);
+
+/*!
+ * \brief Interrupt Group Prototype
+ *
+ * This data type is used to group interrupts of various devices together
+ * so they can be enabled or disabled simultaneously. A given interrupt user
+ * (eg. messaging, QCD, etc) specifies a value of this data type when its
+ * interrupt resources are allocated. The kernel associates those resources
+ * with the specified value so when this value is specified on the enable or
+ * disable interupts system call, all of the interrupts in the group are
+ * operated upon. Examples of devices that can be grouped in this way include
+ * DMA fifos, torus, tree, etc.
+ *
+ * \todo The kernel should provide interfaces to allocate a
+ * Kernel_InterruptGroup_t and deallocate it.
+ */
+typedef void * Kernel_InterruptGroup_t;
+
+
+
+
+
+/*! \brief Returns the number of Processes (Virtual Nodes) running on this Physical Node.
+ *
+ * \return Process Count
+ * \retval 1 Running in Single Process "SMP Mode"
+ * \retval 2 Running in "2 Virtual Node Mode"
+ * \retval 3 Running in "3 Virtual Node Mode"
+ * \retval 4 Running in "4 Virtual Node Mode"
+ */
+/* __INLINE__ int Kernel_ProcessCount( void )
+{
+ uint32_t shm = _bgp_mfspr( _BGP_SPRGRO_SHMem );
+
+ return( (shm & 0x3) + 1 );
+}
+*/
+/*! \brief Returns the number of Processors (cores) running in this Process (Virtual Node)
+ *
+ * \return Processor Count
+ * \retval 1 Single Processor in this Process (usually 4-VN Mode).
+ * \retval 2 Two Processors in this Process (usually 2-VN Mode).
+ * \retval 3 Three Processors in this Process.
+ * \retval 4 Four Processors in this Process (usually SMP Mode).
+ */
+/* __INLINE__ int Kernel_ProcessorCount( void )
+{
+ uint32_t shm = _bgp_mfspr( _BGP_SPRGRO_SHMem );
+
+ return( ((shm & 0xC) >> 2) + 1 );
+}
+*/
+__INLINE__ int Kernel_GetAppSegmentCount(uint32_t* count)
+{
+ _BGP_SprgShMem shm;
+
+ shm.shmem = _bgp_mfspr(_BGP_SPRGRO_SHMem);
+ if(shm.IsStaticMap)
+ {
+ if(Kernel_ProcessCount() == 1)
+ {
+ *count = 3; /* text/rodata, data, heap */
+ }
+ else
+ {
+ *count = 4; /* text/rodata, data, heap, shared (in dual/vn) */
+ }
+ }
+ else
+ {
+ if(Kernel_ProcessCount() == 1)
+ {
+ *count = 2; /* text/rodata, data/heap */
+ }
+ else
+ {
+ *count = 3; /* text/rodata, data/heap, shared (in dual/vn) */
+ }
+ }
+ return 0;
+}
+
+__INLINE__ int Kernel_GetAppSegmentMapping(uint32_t segmentID, uint32_t coreID, uint32_t* va, uint64_t* pa, uint32_t* length)
+{
+ int rc = 0;
+ _BGP_SprgShMem shm;
+ shm.shmem = _bgp_mfspr(_BGP_SPRGRO_SHMem);
+ if((!shm.IsStaticMap)&&(segmentID > 1))
+ segmentID++;
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_GETAPPSEGMENTMAPPING),
+ "r" (segmentID),
+ "r" (coreID),
+ "r" (va),
+ "r" (pa),
+ "r" (length)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+ return rc;
+}
+
+extern int KERNEL_VIRTUAL2PHYSICAL_static_v2p_initialized;
+extern uint32_t KERNEL_VIRTUAL2PHYSICAL_segcnt;
+
+#define KERNEL_V2P_MAXSEGMENTS 5
+extern uint32_t KERNEL_VIRTUAL2PHYSICAL_segva[KERNEL_V2P_MAXSEGMENTS];
+extern uint64_t KERNEL_VIRTUAL2PHYSICAL_segpa[KERNEL_V2P_MAXSEGMENTS];
+extern size_t KERNEL_VIRTUAL2PHYSICAL_segsz[KERNEL_V2P_MAXSEGMENTS];
+#undef KERNEL_V2P_MAXSEGMENTS
+
+/*! \brief Translate a 32bit Virtual Address to a 36bit Physical Address, returning separated upper and lower parts.
+ *
+ * \param[in] pVA 32bit virtual address in the calling process
+ * \param[in] vsize size in bytes of the virtual range
+ * \param[out] ua_out upper 4 physical address bits
+ * \param[out] pa_out lower 32 physical address bits
+ * \return Error condition for translation
+ * \retval 0 Successful translation, with ua_out and pa_out filled in
+ * \retval -1 Invalid Virtual Address for this process, ua_out and pa_out unmodified.
+ * \retval -2 The range from vaddr to (vaddr+vsize) is not physically contiguous.
+ * \retval -3 vaddr in Scratch, but no Scratch, or not enough Scratch, is enabled.
+ * \retval -4 invalid parameter
+ *
+ * \warning Supports only Text, Data, Stack, and (optional) eDRAM Scratch translation
+ * \warning CNK "pagesize" is 1MB.
+ * \warning Text and Data are virtually contiguous, but not necessarily physically contiguous.
+ * \todo Does not (currently) support > 4GB DDR space.
+ * \todo Does not (currently) support Shared Memory Area.
+ */
+__INLINE__ int Kernel_Virtual2Physical( void *pVA, /* input: 32bit Virtual start address */
+ size_t vsize, /* input: size in bytes of virtual range */
+ uint32_t *ua_out, /* output: upper 4 Physical Address bits */
+ uint32_t *pa_out ) /* output: lower 32 Physical Address bits */
+{
+ _BGP_SprgShMem shmem;
+
+ shmem.shmem = _bgp_mfspr(_BGP_SPRGRO_SHMem);
+ if(shmem.IsStaticMap)
+ {
+ uint32_t x;
+
+ if(KERNEL_VIRTUAL2PHYSICAL_static_v2p_initialized == 0)
+ {
+ Kernel_GetAppSegmentCount(&KERNEL_VIRTUAL2PHYSICAL_segcnt);
+ for(x=0; x<KERNEL_VIRTUAL2PHYSICAL_segcnt; x++)
+ {
+ if(Kernel_GetAppSegmentMapping(x, Kernel_PhysicalProcessorID(), &KERNEL_VIRTUAL2PHYSICAL_segva[x], &KERNEL_VIRTUAL2PHYSICAL_segpa[x], &KERNEL_VIRTUAL2PHYSICAL_segsz[x]))
+ return -1;
+ }
+ KERNEL_VIRTUAL2PHYSICAL_static_v2p_initialized = 1;
+ }
+ for(x=0; x<KERNEL_VIRTUAL2PHYSICAL_segcnt; x++)
+ {
+ if(((uint32_t)pVA >= KERNEL_VIRTUAL2PHYSICAL_segva[x]) && (KERNEL_VIRTUAL2PHYSICAL_segsz[x] > (uint32_t)pVA - KERNEL_VIRTUAL2PHYSICAL_segva[x] + vsize) && ((uint32_t)pVA + vsize > (uint32_t)pVA))
+ {
+ *ua_out = (uint32_t)((KERNEL_VIRTUAL2PHYSICAL_segpa[x] + ((uint32_t)pVA-KERNEL_VIRTUAL2PHYSICAL_segva[x])) >> 32);
+ *pa_out = (uint32_t)((KERNEL_VIRTUAL2PHYSICAL_segpa[x] + ((uint32_t)pVA-KERNEL_VIRTUAL2PHYSICAL_segva[x]))&0xffffffff);
+ return 0;
+ }
+ }
+ return -1;
+ }
+
+ uint32_t vaddr = (uint32_t)pVA;
+ uint32_t texti = _bgp_mfspr( _BGP_SPRGRO_TextI );
+ uint32_t datai = _bgp_mfspr( _BGP_SPRGRO_DataI );
+ uint32_t dst2 = _bgp_mfspr( _BGP_SPRGRO_DST2 );
+ uint32_t shm = (_bgp_mfspr( _BGP_SPRGRO_SHMem ) & 0xFFFFFFC0);
+ uint32_t text_v_start = (texti & 0xFFF00000);
+ uint32_t data_v_start = (datai & 0xFFF00000); /* text_v_limit is (data_v_start - 1) */
+ uint32_t text_ua = ((texti & 0x000000C0) >> 6);
+ uint32_t text_p_start = ((texti & 0x000FFF00) << 12);
+ uint32_t data_ua = ((datai & 0x000000C0) >> 6);
+ uint32_t data_p_start = ((datai & 0x000FFF00) << 12);
+ uint32_t data_v_size = (dst2 & 0xFFF00000);
+ uint32_t data_v_limit = (data_v_start + data_v_size + _BGP_VMM_PAGE_MASK);
+ uint32_t vend = (vaddr + vsize - 1);
+ uint32_t vpage = (vaddr & ~_BGP_VMM_PAGE_MASK); /* which 1MB page? */
+ uint32_t voffset = (vaddr & _BGP_VMM_PAGE_MASK); /* offset within 1MB page */
+
+ /* printf("V2P: texti=0x%08x, datai=0x%08x, dst2=0x%08x\n", texti, datai, dst2 ); */
+ /* printf("V2P: vaddr=0x%08x, vend=0x%08x, text_v_start=0x%08x, data_v_limit=0x%08x\n", */
+ /* vaddr, vend, text_v_start, data_v_limit ); */
+
+ /* parm check */
+ if ( !vsize || !ua_out || !pa_out )
+ return(-4);
+
+ /* range check: below text or off end of data, or in eDRAM Scratch */
+ if ( (vaddr < text_v_start) || (vend > data_v_limit) )
+ {
+ /* Scratch? */
+ if ( vaddr >= _BGP_VA_SCRATCH )
+ {
+ uint32_t scratchMB = ((dst2 & 0x00000078) << (20-3));
+ uint32_t scratch_end = (_BGP_VA_SCRATCH + scratchMB);
+
+ if ( !scratchMB || (vend > scratch_end) )
+ return(-3);
+
+ *ua_out = (uint32_t)_BGP_UA_SCRATCH;
+ *pa_out = (vaddr & _BGP_VM_SCRATCH);
+ return(0);
+ }
+ else if ( shm ) /* Shared Memory? If any, always mapped V=R. */
+ {
+ uint32_t shm_v_start = (shm & 0xFFF00000);
+ uint32_t shm_v_end = (shm_v_start + ((shm & 0x000FFF00) << 12));
+ uint32_t shm_ua = ((shm & 0x000000C0) >> 6);
+
+ if ( (vaddr >= shm_v_start) && (vend <= shm_v_end) )
+ {
+ *ua_out = shm_ua;
+ *pa_out = vaddr;
+ return(0);
+ }
+ }
+
+ return(-1);
+ }
+
+ /* Text? (includes Read-Only Data) */
+ if ( vaddr < data_v_start )
+ {
+ /* if range starts in Text but ends in Data, then discontiguous */
+ if ( vend >= data_v_start )
+ return(-2);
+
+ *ua_out = text_ua;
+ *pa_out = (text_p_start + (vpage - text_v_start) + voffset);
+
+ return(0);
+ }
+
+ /* Data */
+ *ua_out = data_ua;
+ *pa_out = (data_p_start + (vpage - data_v_start) + voffset);
+
+ return(0);
+}
+
+
+/*! \brief Returns a copy of the node's personality
+ *
+ * \param[out] personality Location of personality structure that will be filled in by Kernel_GetPersonality
+ * \param[in] size Size, in bytes, that was allocated to hold the personality structure
+ * \return Error indication
+ * \retval 0 Success
+ * \retval -1 Invalid parameters
+ */
+__INLINE__ int Kernel_GetPersonality(_BGP_Personality_t* personality, size_t size)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+ asm __volatile__ ("li 0,%3;"
+ "mr 3,%1;"
+ "mr 4,%2;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "r" (personality),
+ "r" (size),
+ "i" (_BGP_SYSCALL_NR_GET_PERSONALITY)
+ : "r0", "r3", "r4", "cc", "memory" );
+
+ return( rc );
+}
+
+/*! \brief Starts to checkpoint/restore the Kernel data structures for CNK
+ *
+ * \param[out] personality Location of personality structure that will be filled in by Kernel_GetPersonality
+ * \param[in] size Size, in bytes, that was allocated to hold the personality structure
+ * \param[in] int operation, The type of operation that the kernel needs to provide (e.g. CHECKPOINT_START, CHECKPOINT_RESTART,CHECKPOINT_COMPLETE)
+ * \return Error indication
+ * \retval 0 Success
+ * \retval -1 Invalid parameters
+ */
+__INLINE__ int Kernel_checkpoint(int component, int operation, void *buffer, uint32_t size, uint32_t *actualSize, uint32_t*basePtr)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "mr 8,%7;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_CHECKPOINT),
+ "r" (component),
+ "r" (operation),
+ "r" (buffer),
+ "r" (size),
+ "r" (actualSize),
+ "r" (basePtr)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "cc", "memory" );
+
+ return( rc );
+}
+
+/*! \brief Returns the contents of the running PPC450's processor version register.
+ * \return Contents of PPC450 PVR register
+ */
+__INLINE__ int Kernel_GetProcessorVersion()
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "sc;"
+ "mr %0, 3;"
+
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_GET_PERSONALITY)
+ : "r0", "r3", "cc", "memory" );
+
+ return( rc );
+}
+
+/*! \brief LockBox allocate syscall definition
+ * \param[in] lockid Indicates which counter ID is to be obtained. Counter IDs vary from 0-1023
+ * \param[in] numlocks The number of sequencial counter IDs that will be obtained
+ * \param[out] ptr An array of pointers that will be filled in with the counter virtual addresses.
+ * \param[in] flags Optional flags
+ * \warning Must storage indicated by ptr must be large enough to whole numlocks*sizeof(uint32_t) bytes
+ * \internal This is an internal syscall - do not use.
+ * \see LockBox_AllocateCounter
+ * \see LockBox_AllocateMutex
+ * \see LockBox_AllocateBarrier
+ */
+/* __INLINE__ int Kernel_AllocateLockBox(uint32_t lockid, uint32_t numlocks, uint32_t** ptr, uint32_t flags) */
+/* { */
+/* int rc = 0; // this syscall returns RC in r3 and does not use errno */
+/* */
+/* asm __volatile__ ("li 0,%1;" */
+/* "mr 3,%2;" */
+/* "mr 4,%3;" */
+/* "mr 5,%4;" */
+/* "mr 6,%5;" */
+/* / "sc;" */
+/* "mr %0, 3;" */
+/* : "=&r" (rc) // early clobber */
+/* : "i" (_BGP_SYSCALL_NR_ALLOC_COUNTER), */
+/* "r" (lockid), */
+/* "r" (numlocks), */
+/* "r" (ptr), */
+/* "r" (flags) */
+/* / : "r0", "r3", "r4", "r5", "r6", "cc", "memory" ); */
+/* */
+/* return( rc ); */
+/* } */
+
+/*! \brief Converts a Rank into a XYZT Coordinate
+ *
+ * \param[in] rank Rank for the node
+ * \param[out] xcoord X Coordinate for the specified node
+ * \param[out] ycoord Y Coordinate for the specified node
+ * \param[out] zcoord Z Coordinate for the specified node
+ * \param[out] tcoord T Coordinate for the specified node
+ * \return Error status
+ * \retval 0 Success
+ * \retval non-zero Error
+ */
+__INLINE__ int Kernel_Rank2Coord(uint32_t rank, uint32_t* xcoord, uint32_t* ycoord, uint32_t* zcoord, uint32_t* tcoord)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_RANK2COORD),
+ "r" (rank),
+ "r" (xcoord),
+ "r" (ycoord),
+ "r" (zcoord),
+ "r" (tcoord)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+
+ return( rc );
+}
+
+/*! \brief Converts all ranks into a XYZT Coordinate
+ *
+ * \param[out] XYZT coordinates of all nodes. The array is in
+ * rank order. If a rank is not mapped, its coordinates will be
+ * (255,255,255,255).
+ * \param[in] len specifies the length of the coordinates array. That is,
+ * the caller is guaranteeing that there is storage for coordinates[0],
+ * coordinates[1], ..., coordinates[len-1].
+ * \return Error status
+ * \retval 0 Success
+ * \retval non-zero Error
+ */
+
+typedef struct _Kernel_Coordinates {
+ unsigned char x;
+ unsigned char y;
+ unsigned char z;
+ unsigned char t;
+} kernel_coords_t;
+
+__INLINE__ int Kernel_Ranks2Coords(kernel_coords_t* coordinates, uint32_t len)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_RANKS2COORDS),
+ "r" (coordinates),
+ "r" (len)
+ : "r0", "r3", "cc", "memory" );
+
+ return( rc );
+}
+
+/*! \brief Convert a XYZT Coordinate into a Rank. Also returns number of nodes
+ * \param[in] xcoord X Coordinate used to specify the desired node
+ * \param[in] ycoord Y Coordinate used to specify the desired node
+ * \param[in] zcoord Z Coordinate used to specify the desired node
+ * \param[in] tcoord T Coordinate used to specify the desired node
+ * \param[out] rank Rank of the desired node
+ * \param[out] numnodes Number of Nodes in the partition
+ * \return Error indication
+ * \retval 0 Success
+ * \retval non-zero Error
+ */
+
+__INLINE__ int Kernel_Coord2Rank(uint32_t xcoord, uint32_t ycoord, uint32_t zcoord, uint32_t tcoord, uint32_t* rank, uint32_t* numnodes)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "mr 8,%7;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_COORD2RANK),
+ "r" (xcoord),
+ "r" (ycoord),
+ "r" (zcoord),
+ "r" (tcoord),
+ "r" (rank),
+ "r" (numnodes)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "cc", "memory" );
+
+ return( rc );
+}
+
+/*! \brief Returns the Job ID
+ * \return Contains the control system JobID
+ */
+__INLINE__ uint32_t Kernel_GetJobID()
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_GETJOBID)
+ : "r0", "r3", "cc", "memory" );
+
+ return( rc );
+}
+
+/*! \brief Read from a privileged DCR
+ * \param[in] dcrid Number of the DCR register
+ * \param[out] value Contents of DCR register
+ * \return Error indication
+ * \retval 0 Success
+ * \retval -1 Invalid DCR
+ * \note Only selected previleged DCRs will be accessible via this system call.
+ */
+__INLINE__ uint32_t Kernel_ReadDCR(uint32_t dcrid, uint32_t* value)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_READDCR),
+ "r" (dcrid),
+ "r" (value)
+ : "r0", "r3", "r4", "cc", "memory" );
+
+ return( rc );
+}
+
+/*! \brief Write to a privileged DCR
+ * \param[in] dcrid Number of the DCR register
+ * \param[in] value Contents of DCR register
+ * \return Error indication
+ * \retval 0 Success
+ * \retval -1 Invalid DCR
+ * \note Only selected previleged DCRs will be accessible via this system call.
+ */
+__INLINE__ uint32_t Kernel_WriteDCR(uint32_t dcrid, uint32_t value)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_WRITEDCR),
+ "r" (dcrid),
+ "r" (value)
+ : "r0", "r3", "r4", "cc", "memory" );
+
+ return( rc );
+}
+
+/*!
+ * \brief Query Free DMA Counter Subgroups within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available) subgroups within the specified group.
+ *
+ * \param[in] type Specifies whether this is an injection or
+ * reception counter group (DMA_Type_Injection
+ * or DMA_Type_Reception)
+ * \param[in] grp Group number being queried (0 to
+ * DMA_NUM_COUNTER_GROUPS-1)
+ * \param[out] num_subgroups Pointer to an int where the number of free
+ * subgroups in the specified group is returned
+ * \param[out] subgroups Pointer to an array of num_subgroups ints where
+ * the list of num_subgroups subgroups is returned.
+ * Each int is the subgroup number
+ * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1). The
+ * caller must provide space for
+ * DMA_NUM_COUNTERS_PER_SUBGROUP ints, in case the
+ * entire counter group is free.
+ *
+ * \retval 0 Successful. num_subgroups and subgroups array set as described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupQueryFree()
+ * \note The kernel may need to synchronize with other cores performing
+ * allocate or free syscalls.
+ *
+ */
+__INLINE__ uint32_t Kernel_CounterGroupQueryFree(uint32_t type, uint32_t group, uint32_t* num_subgroups, uint32_t* subgroups)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_COUNTERGRPQUERYFREE),
+ "r" (type),
+ "r" (group),
+ "r" (num_subgroups),
+ "r" (subgroups)
+ : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+
+ return( rc );
+}
+
+/*!
+ * \brief Allocate DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that allocates DMA counters
+ * from the specified group. Counters may be allocated in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters. Parameters specify how interrupts,
+ * generated when a counter hits zero, are to be handled. A
+ * DMA_CounterGroup_t structure is returned for use in other inline
+ * functions to operate on the allocated counters.
+ *
+ * \param[in] type Specifies whether this is an injection or
+ * reception counter group (DMA_Type_Injection
+ * or DMA_Type_Reception)
+ * \param[in] grp Group number whose counters are being allocated
+ * (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in] num_subgroups Number of subgroups to be allocated from the group
+ * (1 to DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in] subgroups Pointer to an array of num_subgroups ints where
+ * the list of subgroups to be allocated is provided.
+ * Each int is the subgroup number
+ * (0 to num_subgroups-1).
+ * \param[in] target The core that will receive the interrupt when a
+ * counter in this allocation hits zero
+ * (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in] handler A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * counter in this allocation hits zero. This
+ * function must be coded to take 4 uint32_t
+ * parameters:
+ * - A pointer to storage specific to this
+ * handler. This is the handler_parm
+ * specified on this allocation function.
+ * - Three unint32_t parameters that are not used.
+ * If handler is NULL, hit-zero interrupts will not
+ * be enabled for these counters.
+ * \param[in] handler_parm A pointer to storage that should be passed to the
+ * interrupt handling function (see handler
+ * parameter)
+ * \param[in] interruptGroup A InterruptGroup_t that identifies the
+ * group of interrupts that the counters being
+ * allocated will become part of.
+ * \param[out] cg_ptr Pointer to a structure that is filled in upon
+ * successful return for use in other inline
+ * functions to operate on the allocated counters.
+ * \li counter - Array of software counter
+ * structures. Each element
+ * points to the corresponding
+ * hardware counter in DMA SRAM.
+ * Pointers are null if not
+ * allocated).
+ * Counters are initialized to
+ * DMA_COUNTER_INIT_VAL,
+ * disabled, their hit_zero bit
+ * is off, base and max are NULL.
+ * \li status_ptr - Points to status area within the
+ * DMA memory map.
+ * \li permissions - Bits set for each allocated
+ * counter
+ * \li grp_permissions - Permissions for each
+ * subgroup
+ * \li group_id - The group number
+ * \li type - The type of DMA (injection or
+ * reception)
+ *
+ * \retval 0 Successful. Counters allocated and cg_ptr structure filled in as
+ * described.
+ * \retval -1 Unsuccessful. errno gives the reason. Nothing has been
+ * allocated.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupAllocate()
+ * \note The kernel may need to synchronize with other cores performing queries
+ * or frees.
+ *
+ */
+__INLINE__ uint32_t Kernel_CounterGroupAllocate(uint32_t type, uint32_t group, uint32_t num_subgroups, uint32_t* subgroups, uint32_t target, uint32_t handler, uint32_t* handler_parm, uint32_t interruptGroup, uint32_t* cg_ptr)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "mr 8,%7;"
+ "mr 9,%8;"
+ "mr 10,%9;"
+ "mr 11,%10;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_COUNTERGRPALLOCATE),
+ "r" (type),
+ "r" (group),
+ "r" (num_subgroups),
+ "r" (subgroups),
+ "r" (target),
+ "r" (handler),
+ "r" (handler_parm),
+ "r" (interruptGroup),
+ "r" (cg_ptr)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "cc", "memory" );
+
+ return( rc );
+}
+
+/*!
+ * \brief Free DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA counters
+ * from the specified group. Counters may be freed in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters.
+ *
+ * \param[in] grp Group number whose counters are being freed
+ * (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in] num_subgroups Number of subgroups to be freed from the group
+ * (1-DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in] subgroups Pointer to an array of num_subgroups ints where
+ * the list of subgroups to be freed is provided.
+ * Each int is the subgroup number
+ * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1).
+ * \param[out] cg_ptr Pointer to the structure previously filled in when
+ * these counters were allocated. Upon successful
+ * return, this structure is updated to reflect the
+ * freed counters:
+ * \li counter[] - Counter structures Pointers to
+ * freed counters nulled.
+ * \li permissions - Bits cleared for each freed
+ * counter.
+ *
+ * \retval 0 Successful. Counters freed and cg_ptr structure updated as
+ * described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupFree()
+ * \note The kernel may need to synchronize with other cores performing allocates
+ * or queries.
+ */
+__INLINE__ uint32_t Kernel_CounterGroupFree(uint32_t group, uint32_t num_subgroups, uint32_t* subgroups, uint32_t* cg_ptr)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_COUNTERGRPFREE),
+ "r" (group),
+ "r" (num_subgroups),
+ "r" (subgroups),
+ "r" (cg_ptr)
+ : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+
+ return( rc );
+}
+
+
+/*!
+ * \brief Query Free DMA InjFifos within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available to be allocated) fifos within the specified group.
+ *
+ * \param[in] grp Group number being queried
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1)
+ * \param[out] num_fifos Pointer to an int where the number of free
+ * fifos in the specified group is returned
+ * \param[out] fifo_ids Pointer to an array of num_fifos ints where
+ * the list of free fifos is returned.
+ * Each int is the fifo number
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * The caller must provide space for
+ * DMA_NUM_INJ_FIFOS_PER_GROUP ints,
+ * in case the entire fifo group is free.
+ *
+ * \retval 0 Successful. num_fifos and fifo_ids array set as described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupQueryFree()
+ */
+__INLINE__ uint32_t Kernel_InjFifoGroupQueryFree(uint32_t group, uint32_t* num_fifos, uint32_t* fifo_ids)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_INJFIFOGRPQUERYFREE),
+ "r" (group),
+ "r" (num_fifos),
+ "r" (fifo_ids)
+ : "r0", "r3", "r4", "r5", "cc", "memory" );
+
+ return( rc );
+}
+
+/*!
+ * \brief Allocate DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that allocates specified
+ * DMA injection fifos from the specified group. Parameters specify whether
+ * each fifo is high or normal priority, local or non-local, and which torus
+ * fifos it maps to. A DMA_InjFifoGroup_t structure is returned for
+ * use in other inline functions to operate on the allocated fifos.
+ *
+ * Refer to the interrupt discussion at the top of this include file to see why
+ * there are no interrupt-related parameters.
+ *
+ * \param[in] grp Group number whose DMA injection fifos are being
+ * allocated (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in] num_fifos Number of fifos to be allocated from the group
+ * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in] fifo_ids Pointer to an array of num_fifos ints where
+ * the list of fifos to be allocated is provided.
+ * Each int is the fifo number (0 to num_fifos-1).
+ * \param[in] priorities Pointer to an array of num_fifos short ints where
+ * the list of priorities to be assigned to the fifos
+ * is provided. Each short int indicates the priority
+ * to be assigned to each of the fifos identified in
+ * the fifo_ids array (0 is normal, 1 is high priority).
+ * \param[in] locals Pointer to an array of num_fifos short ints where
+ * an indication is provided of whether each fifo will
+ * be used for local transfers (within the same node)
+ * or torus transfers. Each short int indicates the
+ * local/non-local attribute to be assigned to each of
+ * the fifos identified in the fifo_ids array (0 is
+ * non-local, 1 is local). If 0, the corresponding
+ * array element in ts_inj_maps indicates which torus
+ * fifos can be injected.
+ * \param[in] ts_inj_maps Pointer to an array of num_fifos short ints where
+ * the torus fifos that can be injected are specified
+ * for each fifo. Each short int specifies which of
+ * the 8 torus injection fifos can be injected when a
+ * descriptor is injected into the DMA injection fifo.
+ * Must be non-zero when the corresponding "locals"
+ * is 0.
+ * \param[out] fg_ptr Pointer to a structure that is filled in upon
+ * successful return for use in other inline functions
+ * to operate on the allocated fifos.
+ * \li fifos - Array of fifo structures. Structures
+ * for allocated fifos are initialized as
+ * documented below. Structures for
+ * fifos not allocated by this instance of
+ * this syscall are initialized to binary
+ * zeros. Allocated fifos are enabled.
+ * \li status_ptr - Points to status area within the
+ * DMA memory map.
+ * \li permissions - Bits indicating which fifos were
+ * allocated during this syscall.
+ * \li group_id - The id of this group.
+ *
+ * \retval 0 Successful. Fifos allocated and fg_ptr structure filled in as
+ * described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupAllocate()
+ * \return The group fifo structure pointed to by fg_ptr is completely
+ * initialized as follows:
+ * - status_ptr points to the appropriate fifo group DMA memory map
+ * - fifo structures array. Fifo structures for fifos not allocated
+ * during this syscall are initialized to binary zeros. Fifo
+ * structures for fifos allocated during this syscall are initialized:
+ * - fifo_hw_ptr points to the DMA memory map for this fifo. The
+ * hardware start, end, head, and tail are set to zero by the
+ * kernel.
+ * - All other fields in the structure are set to zero by the kernel
+ * except priority, local, and ts_inj_map are set to reflect what
+ * was requested in the priorities, locals, and ts_inj_maps
+ * syscall parameters.
+ *
+ */
+__INLINE__ uint32_t Kernel_InjFifoGroupAllocate(uint32_t group, uint32_t num_fifos, uint32_t* fifo_ids, uint16_t* priorities, uint16_t* locals, uint8_t* ts_inj_maps, uint32_t* fg_ptr)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "mr 8,%7;"
+ "mr 9,%8;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_INJFIFOGRPALLOCATE),
+ "r" (group),
+ "r" (num_fifos),
+ "r" (fifo_ids),
+ "r" (priorities),
+ "r" (locals),
+ "r" (ts_inj_maps),
+ "r" (fg_ptr)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory" );
+
+ return( rc );
+}
+
+/*!
+ * \brief Free DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA injection
+ * counters from the specified group.
+ *
+ * \param[in] grp Group number whose DMA injection fifos are being
+ * freed (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in] num_fifos Number of fifos to be freed from the group
+ * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in] fifo_ids Pointer to an array of num_fifos ints where
+ * the list of fifos to be freed is provided.
+ * Each int is the fifo number (0 to num_fifos-1).
+ * \param[in] fg_ptr Pointer to the structure previously filled in when
+ * these fifos were allocated. Upon successful
+ * return, this structure is updated to reflect the
+ * freed fifos:
+ * \li fifos - Structures for freed fifos zero'd.
+ * Freed fifos are disabled.
+ * \li permissions - Bits cleared for each freed fifo.
+ *
+ * \retval 0 Successful. Fifos freed and fg_ptr structure updated as described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupFree()
+ * \note This is a fatal error if any of the fifos are non empty and activated
+ *
+ */
+__INLINE__ uint32_t Kernel_InjFifoGroupFree(uint32_t group, uint32_t num_fifos, uint32_t* fifo_ids, uint32_t* fg_ptr)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_INJFIFOGRPFREE),
+ "r" (group),
+ "r" (num_fifos),
+ "r" (fifo_ids),
+ "r" (fg_ptr)
+ : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+
+ return( rc );
+}
+
+/*!
+ * \brief DMA InjFifo Initialization By Id
+ *
+ * - For an allocated injection DMA fifo, initialize its start, head, tail, and
+ * end.
+ * - Compute fifo size and free space.
+ * - Initialize wrap count.
+ * - Activate the fifo.
+ *
+ * \param[in] fg_ptr Pointer to fifo group structure.
+ * \param[in] fifo_id Id of the fifo to be initialized
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] va_start Virtual address of the start of the fifo.
+ * \param[in] va_head Virtual address of the head of the fifo (typically
+ * equal to va_start).
+ * \param[in] va_end Virtual address of the end of the fifo.
+ *
+ * \retval 0 Successful.
+ * \retval -1 Unsuccessful. Error checks include
+ * - va_start < va_end
+ * - va_start <= va_head <=
+ * (va_end - DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ * - va_start and va_end are 32-byte aligned
+ * - fifo_size is larger than (DMA_MIN_INJECT_SIZE_IN_QUADS +
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ *
+ */
+__INLINE__ uint32_t Kernel_InjFifoInitById(uint32_t* fg_ptr,
+ int fifo_id,
+ uint32_t* va_start,
+ uint32_t* va_head,
+ uint32_t* va_end)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_INJFIFOINITID),
+ "r" (fg_ptr),
+ "r" (fifo_id),
+ "r" (va_start),
+ "r" (va_head),
+ "r" (va_end)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+
+ return( rc );
+}
+
+
+/*!
+ * \brief Set DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that
+ * - Sets DCRs establishing the map between the hardware torus fifos and the
+ * DMA reception fifos that are to receive the packets from those hardware
+ * torus fifos.
+ * - Sets DCRs establishing the DMA reception fifos that are to receive
+ * local transfer packets.
+ * - Sets the DCRs establishing the type (0 or 1) of each reception fifo.
+ * - Sets the DCRs establishing the threshold for type 0 and 1 reception fifos.
+ * - Leaves all of the fifos that are used in a "disabled" state.
+ * DMA_RecFifoInitById() initializes and enables the fifos.
+ *
+ * \param[in] rec_map Reception Fifo Map structure, defining the mapping.
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ * \internal This is an internal syscall
+ * \see DMA_RecFifoSetMap
+ * \note This function should be called once per job, after DMA_ResetRelease().
+ * It may be called by any core, but once a core has called it, other
+ * calls by that same core or any other core will fail.
+ *
+ * \note During job init, the kernel sets up the DCR clear masks for each
+ * reception fifo group (DCRs 0xD68 - 0xD6C) such that a write to clear
+ * a fifo in group g only clears group g.
+ *
+ */
+__INLINE__ int Kernel_RecFifoSetMap(uint32_t* rec_map)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_RECFIFOSETMAP),
+ "r" (rec_map)
+ : "r0", "r3", "cc", "memory" );
+ return( rc );
+}
+
+/*!
+ * \brief Get DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that returns a DMA
+ * reception fifo map structure, filled in according to the DCRs.
+ *
+ * \param[in,out] rec_map A pointer to a Reception Fifo Map structure
+ * that will be filled-in upon return.
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+__INLINE__ int Kernel_RecFifoGetMap(uint32_t* rec_map)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_RECFIFOGETMAP),
+ "r" (rec_map)
+ : "r0", "r3", "cc", "memory" );
+ return( rc );
+}
+
+/*!
+ * \brief Get DMA Reception Fifo Group
+ *
+ * This is a wrapper around a System Call. This function returns THE
+ * one-and-only pointer to the fifo group structure, with the entries all
+ * filled in from info in the DCRs. If called multiple times with the same
+ * group, it will always return the same pointer, and the system call will
+ * not be invoked again.
+ *
+ * It must be called AFTER DMA_RecFifoSetMap().
+ *
+ * By convention, the same "target" is used for normal and header fifo
+ * interrupts (could be changed). In addition, by convention, interrupts for
+ * fifos in group g come out of the DMA as non-fatal irq bit 28+g,
+ * ie, only fifos in group g can cause the "type g" threshold interrupts.
+ *
+ * \param[in] grp The group number (0 through DMA_NUM_REC_FIFO_GROUPS).
+ * \param[in] target The core that will receive the interrupt when a
+ * fifo in this group reaches its threshold
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * Ignored on subsequent call with the same group.
+ * \param[in] normal_handler A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * normal fifo in this group reaches its threshold.
+ * This function must be coded to take 4 uint32_t
+ * parameters:
+ * - A pointer to storage specific to this
+ * handler. This is the normal_handler_parm
+ * specified on this function call.
+ * - 3 uint32_t parameters that are not used.
+ * If normal_handler is NULL, threshold interrupts
+ * are not delivered for normal fifos in this group.
+ * Ignored on subsequent call with the same group.
+ * \param[in] normal_handler_parm A pointer to storage that should be passed
+ * to the normal interrupt handling function
+ * (see normal_handler parameter).
+ * Ignored on subsequent call with the same
+ * group.
+ * \param[in] header_handler ** This parameter is deprecated. Specify NULL.**
+ * A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * header fifo in this group reaches its threshold.
+ * This function must be coded to take 2 parameters:
+ * void* A pointer to storage specific to this
+ * handler. This is the header_handler_parm
+ * specified on this function call.
+ * int The global fifo ID of the fifo that hit
+ * its threshold (0 through
+ * NUM_DMA_REC_FIFOS-1).
+ * If header_handler is NULL, threshold interrupts
+ * are not delivered for header fifos in this group.
+ * Ignored on subsequent call with the same group.
+ * \param[in] header_handler_parm ** This parameter is deprecated. Specify
+ * NULL. **
+ * A pointer to storage that should be passed
+ * to the header interrupt handling function
+ * (see header_handler parameter).
+ * Ignored on subsequent call with the same
+ * group.
+ * \param[in] interruptGroup A InterruptGroup_t that identifies the
+ * group of interrupts that the fifos in this group
+ * will become part of.
+ * Ignored on subsequent call with the same group.
+ *
+ * \return RecFifoGroupStruct Pointer to a DMA Reception Fifo Group structure
+ * that reflects the fifos that are being used in
+ * this group. This same structure is shared by
+ * all users of this reception fifo group.
+ * NULL is returned if an error occurs.
+ *
+ * \note The following comments from Phil about the internals of the syscall:
+ * - error checks
+ * - 0 <= group_id < 4
+ * - the start of the fifo group is a valid virtual address (tlb mapped)?
+ * - disable the rDMA
+ * - call _BGP_rDMA_Fifo_Get_Map to get the DCR mapping information
+ * - loop through the map to determine how many and which fifos in this group
+ * are used, including headers
+ * - filling in the addresses of used fifos
+ * - In particular, any pointer to any fifo in the group that is not used
+ * will have a null pointer
+ * - furthermore,
+ * - write starting values to all used fifos
+ * - make sure all interrupts are cleared
+ * - enable rDMA
+ *
+ */
+__INLINE__ int Kernel_RecFifoGetFifoGroup(
+ uint32_t* fifogroup,
+ int grp,
+ int target,
+ Kernel_CommThreadHandler normal_handler,
+ void *normal_handler_parm,
+ Kernel_CommThreadHandler header_handler,
+ void *header_handler_parm,
+ Kernel_InterruptGroup_t interruptGroup
+ )
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "mr 8,%7;"
+ "mr 9,%8;"
+ "mr 10,%9;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_RECGETFIFOGROUP),
+ "r" (fifogroup),
+ "r" (grp),
+ "r" (target),
+ "r" (normal_handler),
+ "r" (normal_handler_parm),
+ "r" (header_handler),
+ "r" (header_handler_parm),
+ "r" (interruptGroup)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
+
+ return( rc );
+}
+
+/*!
+ * \brief DMA RecFifo Initialization By Id
+ *
+ * - For a DMA reception fifo, initialize its start, head, tail, and end.
+ * - Compute fifo size and free space.
+ *
+ * \param[in] fg_ptr Pointer to fifo group structure.
+ * \param[in] fifo_id Id of the fifo to be initialized
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in] va_start Virtual address of the start of the fifo.
+ * \param[in] va_head Virtual address of the head of the fifo (typically
+ * equal to va_start).
+ * \param[in] va_end Virtual address of the end of the fifo.
+ *
+ * \retval 0 Successful.
+ * \retval -1 Unsuccessful. Error checks include
+ * - va_start < va_end
+ * - va_start <= va_head < va_end
+ * - va_start and va_end are 32-byte aligned
+ * - fifo_size >= DMA_MIN_REC_FIFO_SIZE_IN_BYTES
+ *
+ */
+__INLINE__ int Kernel_RecFifoInitById(
+ uint32_t* fg_ptr,
+ int fifo_id,
+ void *va_start,
+ void *va_head,
+ void *va_end
+ )
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_RECFIFOINITID),
+ "r" (fg_ptr),
+ "r" (fifo_id),
+ "r" (va_start),
+ "r" (va_head),
+ "r" (va_end)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+
+ return( rc );
+}
+
+ /*!
+ * \brief Injects a binary (RAW) RAS message to the control system
+ *
+ * Ships a RAS message of the given facility, unit, errcode, and packed data to the control system. No checking is done on the
+ * correctness of the data. Can be used to simulate a RAS message for testing purposes.
+ *
+ * \param[in] facility High level component detecting the condition. (e.g., _bgp_fac_kernel, _bgp_fac_application, _bgp_fac_diags)
+ * \param[in] unit Unit generating the RAS event. (e.g., _bgp_unit_ppc450, _bgp_unit_snoop)
+ * \param[in] err_code Error code for RAS event (e.g., _bgp_err_ppc450_l1d_dpe0)
+ * \param[in] numwords Number of 32-bit integers in the packed binary array
+ * \param[in] array Pointer to the array of packed binary data.
+ *
+ * Restriction. There is currently a limit of eight 32-bit words of packed binary data.
+ *
+ * \internal This function is intended for testing purposes only. It should not be used in a production system as it could introduce false RAS messages.
+ */
+
+__INLINE__ int Kernel_InjectRAWRAS(
+ _BGP_Facility facility,
+ _BGP_RAS_Units unit,
+ uint16_t err_code,
+ int numwords,
+ const uint32_t* array)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_RAWRASINJECT),
+ "r" (facility),
+ "r" (unit),
+ "r" (err_code),
+ "r" (numwords),
+ "r" (array)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "cc", "memory" );
+
+ return( rc );
+}
+
+ /*!
+ * \brief Injects a ASCII (Textual) RAS message to the control system
+ *
+ * Ships a RAS message of the given facility, unit, errcode, and an ASCII string to the control system. No checking is done on the
+ * correctness of the facility or unit. Can be used to simulate a RAS message for testing purposes.
+ *
+ * \param[in] facility High level component detecting the condition. (e.g., _bgp_fac_kernel, _bgp_fac_application, _bgp_fac_diags)
+ * \param[in] unit Unit generating the RAS event. (e.g., _bgp_unit_ppc450, _bgp_unit_snoop)
+ * \param[in] err_code Error code for RAS event (e.g., _bgp_err_ppc450_l1d_dpe0)
+ * \param[in] text Pointer to a string of null-terminated ASCII characters
+ *
+ * \internal This function is intended for testing purposes only. It should not be used in a production system as it could introduce false RAS messages.
+ */
+__INLINE__ int Kernel_InjectASCIIRAS(
+ _BGP_Facility facility,
+ _BGP_RAS_Units unit,
+ uint16_t err_code,
+ const uint8_t* text)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_ASCIIRASINJECT),
+ "r" (facility),
+ "r" (unit),
+ "r" (err_code),
+ "r" (text)
+ : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+
+ return( rc );
+}
+
+
+
+/*!
+ * \brief Enables/Disables the counter overflow/underflow interrupts
+ *
+ * This function is a wrapper around a system call that can enable or disable the 4 counter overflow/underflow interrupts
+ *
+ * \param[in] enable/disable boolean
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+__INLINE__ int Kernel_ChgCounterInterruptEnables(uint32_t enable)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_CHGDMACTRINTERRUPT),
+ "r" (enable)
+ : "r0", "r3", "cc", "memory" );
+ return( rc );
+}
+
+
+/*!
+ * \brief Clears the Full Reception FIFO (DD1 workaround)
+ *
+ * This function exists to reset the DMA reception fifos - it is a workaround for DD1 only. It should not be needed in DD2.
+ *
+ * NOTE: the implementation has been removed.
+ */
+__INLINE__ int Kernel_ClearFullReceptionFifo()
+{
+ return 0;
+}
+
+#include <spi/lockbox_interface.h>
+
+#if ((!defined(__CNK__)) && (!defined(__BL__)))
+/*! \brief Creates a pthread with a commthread attribute
+ *
+ * \note CNK restriction: 1 CommThread per core is allowed
+ * \note In Dual or VNM, each process must allocate its own commthreads
+ * \note CommThreads are pinned per core. (e.g., in SMP mode, this SPI must be called 4 times to create enough CommThreads for each processor)
+ * \warning non-portable pthread API
+ * \param[in] thread pthread_t structure
+ * \param[in] attr pthread_attr_t structure
+ * \param[in] start_routine function pointer of the thread's main()
+ * \param[in] arg 1st argument to the pthread
+ * \return Error condition from pthread_create
+ * \retval 0 success
+ * \retval -1 error, check errno
+ */
+__INLINE__ int pthread_create_CommThread_np( pthread_t *thread,
+ pthread_attr_t *attr,
+ void *(*start_routine)(void *),
+ void *arg )
+{
+ uint32_t usprg0 = _bgp_mfspr( SPRN_USPRG0 ); /* save orig usprg0 */
+
+ _bgp_mtspr( SPRN_USPRG0, _BGP_COMMTHREAD_MAGIC );
+
+ int rc = pthread_create( thread, attr, start_routine, arg );
+ _bgp_mtspr( SPRN_USPRG0, usprg0 ); /* restore orig usprg0 */
+
+ return( rc );
+}
+#endif
+
+/*! \brief Causes a commthread to disappear from the runqueue
+ *
+ * \note Kernel does not guarantee that the instruction pointer, stack pointer, and register state are preserved across a poof.
+ * \note TLS data is preserved across a poof
+ * \note This SPI is only executable on a comm. thread.
+ * \warning non-portable pthread API
+ * \return error indication
+ * \retval success Does not return. Thread has "poofed"
+ * \retval -1 Calling thread is not a CommThread, so cannot poof
+ */
+__INLINE__ int pthread_poof_np( void )
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_PTHREAD_POOF)
+ : "r0", "r3", "cc", "memory" );
+
+ return( rc );
+}
+
+
+
+/*! \defgroup COMMTHRD_OPCODES CommThread Opcodes
+ * \{
+ * \note Only 1 interrupt route can be specified per opcode
+ * \note CallFunc, DisableIntOnEntry, EnableIntOnPoof can be specified in any combination
+ * \note Current support requires that DisableIntOnEntry and EnableIntOnPoof be specified
+ */
+#define COMMTHRD_OPCODE_DISABLE 0x00 /* !< Interrupt route - Not routed / interrupt disabled */
+#define COMMTHRD_OPCODE_CORE0 0x01 /* !< Interrupt route - Dispatched on core0 */
+#define COMMTHRD_OPCODE_CORE1 0x02 /* !< Interrupt route - Dispatched on core1 */
+#define COMMTHRD_OPCODE_CORE2 0x03 /* !< Interrupt route - Dispatched on core2 */
+#define COMMTHRD_OPCODE_CORE3 0x04 /* !< Interrupt route - Dispatched on core3 */
+#define COMMTHRD_OPCODE_BCAST 0x05 /* !< Interrupt route - Dispatched on all cores */
+#define COMMTHRD_OPCODE_ROUTEMASK 0x0F /* !< Interrupt route mask */
+#define COMMTHRD_OPCODE_CALLFUNC 0x10 /* !< The provided function will be called on the comm. thread */
+#define COMMTHRD_OPCODE_DISABLEINTONENTRY 0x20 /* !< Interrupts using cntrid will be disabled when comm. thread is invoked */
+#define COMMTHRD_OPCODE_ENABLEINTONPOOF 0x40 /* !< Interrupts using cntrid will be enabled when comm. thread poofs */
+/*!
+ * \}
+ */
+
+/*! \brief Generates an InterruptID value
+ * \param[in] group group of the interrupt. range 0-9.
+ * \param[in] irq_in_group irq within the group. range 0-31.
+ * \return Composite value able to be passed to Kernel_SetCommThreadConfig
+ * \see Kernel_SetCommThreadConfig
+ */
+#define Kernel_MkInterruptID(group, irq_in_group) ((group<<5)|(irq_in_group&0x1f))
+
+/*!
+ * \brief Sets kernel data structures needed to dispatch a communications thread
+ *
+ * Each interrupt on BGP can be used to launch a communications thread. Since access to the
+ * interrupt controller is privileged, the function exposes some interrupt control to the
+ * user application.
+ * \pre Counter must have been allocated via the LockBox_AllocateCounter() routine.
+ * \pre It is recommended that Kernel_DisableInteruptClass() be called twice on the counter
+ * to ensure that the interrupt is disabled until all interrupts for the counter
+ * have been configured.
+ * \pre All
+ * \post After the last call to Kernel_SetCommThreadConfig for the counter, invoke
+ * Kernel_EnableInterruptClass() and Kernel_HardEnableInterruptClass() on
+ * that counter to enable the interrupts for that class.
+ * \see LockBox_AllocateCounter
+ * \see Kernel_DisableInterruptClass
+ * \see Kernel_EnableInterruptClass
+ * \see Kernel_HardEnableInterruptClass
+ * \note An interrupt can only belong to 1 interrupt class (a.k.a., lockbox counter)
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Kernel may prevent changing interrupt settings for certain InterruptID values.
+ * \note If an interrupt fires on a core without a comm. thread, results are not guaranteed.
+ * \return Completion status of the command.
+ * \retval 0 no error occurred
+ * \retval EINVAL invalid parameter
+ * \param[in] InterruptID Identifies a unique interrupt line. \see Kernel_MkInterruptID
+ * \param[in] opcode Specifies what operation to perform when the interrupt occurs. Valid \ref COMMTHRD_OPCODES
+ * \param[in] cntrid ID of the lockbox counter used for interrupt enable/disable control
+ * \param[in] funcptr Function pointer that will be invoked when the interrupt fires
+ * \param[in] arg1 1st argument to the funcptr when the interrupt fires
+ * \param[in] arg2 2nd argument to the funcptr when the interrupt fires
+ * \param[in] arg3 3rd argument to the funcptr when the interrupt fires
+ *
+ */
+__INLINE__ int Kernel_SetCommThreadConfig(int InterruptID, int opcode, LockBox_Counter_t cntrid,
+ Kernel_CommThreadHandler funcptr,
+ uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4)
+{
+ int rc = 0;
+ asm __volatile__ ("li 0,%1;"
+ "mr 3, %2;"
+ "mr 4, %3;"
+ "mr 5, %4;"
+ "mr 6, %5;"
+ "mr 7, %6;"
+ "mr 8, %7;"
+ "mr 9, %8;"
+ "mr 10, %9;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_SETCOMMTHREADCONFIG),
+ "r" (InterruptID),
+ "r" (opcode),
+ "r" (cntrid),
+ "r" (funcptr),
+ "r" (arg1),
+ "r" (arg2),
+ "r" (arg3),
+ "r" (arg4)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
+ return rc;
+}
+
+/*!
+ * \brief Returns the kernel data structures that were specified to dispatch communication thread
+ *
+ * Each interrupt on BGP can be used to launch a communications thread. Since access to the
+ * interrupt controller is privileged, the function exposes some interrupt control to the
+ * user application.
+ *
+ * \param[in] InterruptID Identifies a unique interrupt line.
+ * \param[out] opcode Storage for opcode value. Specifies which core receives the interrupt. It also controls whether the interrupt disables a class of interrupts. Valid \ref COMMTHRD_OPCODES
+ * \param[out] cntrid Storage for ID of the lockbox counter used for interrupt enable/disable control
+ * \param[out] funcptr Storage for Function pointer that will be invoked when the interrupt fires
+ * \param[out] arg1 Storage for 1st argument to the funcptr when the interrupt fires
+ * \param[out] arg2 Storage for 2nd argument to the funcptr when the interrupt fires
+ * \param[out] arg3 Storage for 3rd argument to the funcptr when the interrupt fires
+ * \return Completion status of the command.
+ * \retval 0 no error occurred
+ * \retval EINVAL invalid parameter
+ *
+ */
+__INLINE__ int Kernel_GetCommThreadConfig(int InterruptID, int* opcode, LockBox_Counter_t* cntrid,
+ Kernel_CommThreadHandler* funcptr,
+ uint32_t* arg1, uint32_t* arg2, uint32_t* arg3, uint32_t* arg4)
+{
+ int rc = 0;
+ asm __volatile__ ("li 0,%1;"
+ "mr 3, %2;"
+ "mr 4, %3;"
+ "mr 5, %4;"
+ "mr 6, %5;"
+ "mr 7, %6;"
+ "mr 8, %7;"
+ "mr 9, %8;"
+ "mr 10, %9;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_GETCOMMTHREADCONFIG),
+ "r" (InterruptID),
+ "r" (opcode),
+ "r" (cntrid),
+ "r" (funcptr),
+ "r" (arg1),
+ "r" (arg2),
+ "r" (arg3),
+ "r" (arg4)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
+ return rc;
+}
+
+/*! \brief Flush interrupt enable/disable state
+ *
+ * For each interrupt that has a lockbox counter associated with it, this SPI will
+ * update the interrupt controller to match the state specified by the lockbox counter.
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Kernel is responsible for updating the interrupt controller to match all lockbox counters
+ *
+ * \return Completion status of the command.
+ * \retval 0 no error occurred
+ */
+__INLINE__ int Kernel_FlushInterruptState()
+{
+ int rc;
+ asm __volatile__ ("li 0,%1;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_FLUSHINTSTATE)
+ : "r0", "r3", "cc", "memory" );
+ return rc;
+}
+
+/*! \brief Indicates that the kernel should disable the interrupt
+ *
+ * Updates the interrupt class's lockbox to indicate that the kernel should disable the interrupt.
+ * Kernel will disable the interrupt at its leisure, but it should ensure that no communications thread
+ * is invoked for that interrupt class.
+ *
+ * The lockbox values have the following meanings:
+ * 0: Interrupts for this classid are enabled
+ * 1: Interrupts for this classid are logically disabled.
+ * If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt.
+ * 2: Interrupts for this classid are hard-disabled. The interrupt will not disturb the core.
+ *
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Do not disable an already disabled interrupt class.
+ * \note A disabled interrupt class is disabled for all 4 cores, regardless of mode.
+ * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines
+ *
+ */
+__INLINE__ uint32_t Kernel_DisableInterruptClass(LockBox_Counter_t classid)
+{
+ return ( LockBox_FetchAndInc(classid) );
+}
+
+/*! \brief Indicates that the kernel should enable the interrupt
+ *
+ * Updates the interrupt class's lockbox to indicate that the kernel should leave this interrupt enabled.
+ * This does not hard-enable the interrupts for this classid (see Kernel_HardEnableInterruptClass).
+ *
+ * The lockbox values have the following meanings:
+ * 0: Interrupts for this classid are enabled
+ * 1: Interrupts for this classid are logically disabled.
+ * If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt.
+ * 2: Interrupts for this classid are hard-disabled. The interrupt will not disturb the core.
+ *
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note The kernel is responsible for incrementing the lockbox counter when the interrupt is hard-disabled.
+ * \note There is potential race condition that must be avoided in the kernel. The kernel will need to Query the lockbox when an interrupt occurs, and if it is non-zero, then increment it (another core could enable the interrupt class between those 2 events). One solution is to always FetchAndInc, but that may lead to an extranous (but rare) FlushInterruptState() call, followed by a FetchAndDec if zero. There are fancier solutions as well.
+ * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines
+ *
+ */
+__INLINE__ uint32_t Kernel_EnableInterruptClass(LockBox_Counter_t classid)
+{
+ return ( LockBox_FetchAndDec(classid) );
+}
+
+/*! \brief Indicates that the kernel should hard enable the interrupt
+ *
+ * Updates the interrupt class's lockbox to indicate that the kernel has hard-enabled this interrupt.
+ * If the kernel has actually disabled the interrupt, this SPI will enable the interrupt by using the
+ * Kernel_FlushInterruptState() SPI.
+ *
+ * The lockbox values have the following meanings:
+ * 0: Interrupts for this classid are enabled
+ * 1: Interrupts for this classid are logically disabled.
+ * If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt.
+ * 2: Interrupts for this classid are hard-disabled. The interrupt will not disturb the core.
+ *
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note The kernel is responsible for incrementing the lockbox counter when the interrupt is disabled.
+ * \note There is potential race condition that must be avoided in the kernel. The kernel will need to Query the lockbox when an interrupt occurs, and if it is non-zero, then increment it (another core could enable the interrupt class between those 2 events). One solution is to always FetchAndInc, but that may lead to an extranous (but rare) FlushInterruptState() call, followed by a FetchAndDec if zero. There are fancier solutions as well.
+ * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines
+ *
+ */
+__INLINE__ void Kernel_HardEnableInterruptClass(LockBox_Counter_t classid)
+{
+ LockBox_FetchAndDec(classid);
+ Kernel_FlushInterruptState();
+}
+
+/*! \brief Delivers an interrupt to the cores specified in the mask
+ * \param[in] coremask Bitmask describing which processor cores will receive the interrupt. Processor 0 is the least significant bit (1<<0 in C parlance). Processor 3 is 1<<3. Any combination of processors can be interrupted.
+ * \note It is possible to interrupt yourself.
+ */
+__INLINE__ int Kernel_DeliverCommSignal(uint32_t ipiset, uint32_t coremask)
+{
+ int rc = 0;
+ asm __volatile__ ("li 0,%1;"
+ "mr 3, %2;"
+ "mr 4, %3;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_DELIVERCOMMSIGNAL),
+ "r" (ipiset),
+ "r" (coremask)
+ : "r0", "r3", "r4", "cc", "memory" );
+ return rc;
+}
+
+/*!
+ * \brief Suspends/Resumes a core
+ *
+ * \param[in] target core ID
+ * \param[in] suspend Boolean. TRUE if core is to be suspended
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ * \note In a threaded application, use care to avoid suspending a thread containing a lock needed by the active thread. (e.g., if the other core is performing a printf, it may have the glibc io subsystem locked with a mutex. If that happens, the main thread may deadlock if it also happens to call printf)
+ *
+ */
+__INLINE__ int Kernel_ChangeCoreEnables(uint32_t target_core, uint32_t suspend)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_CHGCOREENABLES),
+ "r" (target_core),
+ "r" (suspend)
+ : "r0", "r3", "r4", "cc", "memory" );
+ return( rc );
+}
+
+/*! \brief Persistent Shared Memory interface to application. Currently, simlpy a wrapper to open(2),
+ * with a prefix of /dev/persist
+ */
+__INLINE__ int persist_open( char *name, int oflag, mode_t mode )
+{
+ char pathName[PATH_MAX];
+ strcpy(pathName, "/dev/persist/");
+ strncat(pathName, name, PATH_MAX - strlen("/dev/persist/") - 1);
+ return open(pathName, oflag, mode);
+}
+
+/*! \brief Memory region types that can be used for Kernel_GetMemorySize.
+*/
+enum KERNEL_MEMSIZETYPE
+{
+ KERNEL_MEMSIZE_SHARED = 200, /*!< Size in bytes of shared memory */
+ KERNEL_MEMSIZE_PERSIST, /*!< Size in bytes of persistent memory */
+ KERNEL_MEMSIZE_HEAPAVAIL, /*!< Size in bytes of available heap (must be process leader (a.k.a main) thread) */
+ KERNEL_MEMSIZE_ESTHEAPAVAIL, /*!< Estimated size in bytes of the available heap */
+ KERNEL_MEMSIZE_STACKAVAIL, /*!< Size in bytes available to the process leader's stack. (must be process leader (a.k.a. main) thread) */
+ KERNEL_MEMSIZE_ESTSTACKAVAIL, /*!< Estimated size in bytes available to the process leader's stack */
+ KERNEL_MEMSIZE_STACK, /*!< Size in bytes of the process leader's stack (must be process leader (a.k.a. main) thread) */
+ KERNEL_MEMSIZE_ESTSTACK, /*!< Estimated size in bytes available to the process leader's stack */
+ KERNEL_MEMSIZE_HEAP, /*!< Size in bytes of the heap size*/
+ KERNEL_MEMSIZE_GUARD /*!< Size in bytes of the heap guardpage */
+};
+
+/*! \brief Returns size of the specified memory region.
+ */
+
+__INLINE__ int Kernel_GetMemorySize(enum KERNEL_MEMSIZETYPE type, uint32_t* size)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_GETMEMSIZE),
+ "r" (type),
+ "r" (size)
+ : "r0", "r3", "r4", "cc", "memory" );
+ return( rc );
+}
+
+/*! \brief Sets a virtual memory window for the process based on a user supplied physical address and tlb slot
+ */
+
+__INLINE__ int Kernel_SetProcessWindow(int tlbslot, uint64_t window_paddr, size_t window_reqsize, uint32_t window_permissions,
+ uint32_t* window_actualvaddr, uint64_t* window_actualpaddr, size_t* window_actualsize)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+ /* need to divide up the uint64 so we can setup the uint32 registers */
+ uint32_t window_paddr_h = window_paddr >> 32;
+ uint32_t window_paddr_l = (uint32_t)window_paddr;
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "mr 7,%6;"
+ "mr 8,%7;"
+ "mr 9,%8;"
+ "mr 10,%9;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_SETPRWINDOW),
+ "r" (tlbslot),
+ "r" (window_paddr_h),
+ "r" (window_paddr_l),
+ "r" (window_reqsize),
+ "r" (window_permissions),
+ "r" (window_actualvaddr),
+ "r" (window_actualpaddr),
+ "r" (window_actualsize)
+ : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "cc", "memory" );
+ return( rc );
+}
+
+/*! \brief Returns size of the process memory window that was set by the _SetProcessWindow.
+ */
+
+__INLINE__ int Kernel_GetProcessWindow(int tlbslot,
+ uint32_t* window_actualvaddr, uint64_t* window_actualpaddr, size_t* window_actualsize)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "mr 5,%4;"
+ "mr 6,%5;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_GETPRWINDOW),
+ "r" (tlbslot),
+ "r" (window_actualvaddr),
+ "r" (window_actualpaddr),
+ "r" (window_actualsize)
+ : "r0", "r3", "r4", "r5", "r6", "cc", "memory" );
+ return( rc );
+}
+
+/*! \brief Returns the range of available TLB slots for use by Kernel_SetProcessWindow
+ */
+
+__INLINE__ int Kernel_GetProcessWindowSlotRange(int *minslot, int *maxslot)
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "mr 3,%2;"
+ "mr 4,%3;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_GETPRWINDOWSLOTS),
+ "r" (minslot),
+ "r" (maxslot)
+ : "r0", "r3", "r4", "cc", "memory" );
+ return( rc );
+}
+
+/*! \brief Returns the number of Active Processes in the node (np adjusted)
+ *
+ * \return Processor Count
+ * \retval 1 one process is active in the node. (SMT or DUAL/VN with -np restrictions)
+ * \retval 2 two processes active in this node (DUAL or VN with -np restrictions)
+ * \retval 3 three processes active in this node (VN with -np restrictions)
+ * \retval 4 four processes active in this node (VN)
+ */
+__INLINE__ int Kernel_ActiveProcessCount( void )
+{
+ int rc = 0; /* this syscall returns RC in r3 and does not use errno */
+
+ asm __volatile__ ("li 0,%1;"
+ "sc;"
+ "mr %0, 3;"
+ : "=&r" (rc) /* early clobber */
+ : "i" (_BGP_SYSCALL_NR_ACTIVEPROCESSCOUNT)
+ : "r0", "r3", "cc", "memory" );
+ return( rc );
+}
+
+
+#if SPI_DEPRECATED
+
+/* ! \see Kernel_PhysicalProcessorID */
+#define BGP_PhysicalProcessorID Kernel_PhysicalProcessorID
+
+/* ! \see Kernel_Virtual2Physical */
+#define _bgp_Virtual2Physical Kernel_Virtual2Physical
+
+/* ! \see Kernel_GetPersonality */
+#define rts_get_personality(p,s) Kernel_GetPersonality(p,s)
+
+/* ! \see Kernel_PhysicalProcessorID */
+#define rts_get_processor_id() Kernel_PhysicalProcessorID()
+
+/* ! \see Kernel_GetProcessorVersion */
+#define rts_get_processor_version() Kernel_GetProcessorVersion()
+#endif
+
+__END_DECLS
+
+
+#endif /* ! __LINUX__ */
+
+
+#endif /* Add nothing below this line */
diff --git a/arch/powerpc/include/spi/linux_interface.h b/arch/powerpc/include/spi/linux_interface.h
new file mode 100644
index 00000000000000..bb49e1754a4036
--- /dev/null
+++ b/arch/powerpc/include/spi/linux_interface.h
@@ -0,0 +1,777 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+
+#ifndef _BGP_SPI_LINUX_INTERFACE_H_ /* Prevent multiple inclusion */
+#define _BGP_SPI_LINUX_INTERFACE_H_
+
+
+/*! \brief Returns the physical processor ID of the running PPC450 core.
+ *
+ * \return Physical processor ID
+ * \retval 0 Running on processor 0
+ * \retval 1 Running on processor 1
+ * \retval 2 Running on processor 2
+ * \retval 3 Running on processor 3
+ */
+extern inline uint32_t Kernel_PhysicalProcessorID( void ) { return 0; } /* ?????? */
+
+
+/*! \brief Causes a commthread to disappear from the runqueue
+ *
+ * \note Kernel does not guarantee that the instruction pointer, stack pointer, and register state are preserved across a poof.
+ * \note TLS data is preserved across a poof
+ * \note This SPI is only executable on a comm. thread.
+ * \warning non-portable pthread API
+ * \return error indication
+ * \retval success Does not return. Thread has "poofed"
+ * \retval -1 Calling thread is not a CommThread, so cannot poof
+ */
+int pthread_poof_np( void );
+
+
+
+
+/*!
+ * \brief Clears the Full Reception FIFO (DD1 workaround)
+ *
+ * This function exists to reset the DMA reception fifos - it is a workaround for DD1 only. It should not be needed in DD2.
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+int Kernel_ClearFullReceptionFifo(void);
+
+
+/*! \brief Generates an InterruptID value
+ * \param[in] group group of the interrupt. range 0-9.
+ * \param[in] irq_in_group irq within the group. range 0-31.
+ * \return Composite value able to be passed to Kernel_SetCommThreadConfig
+ * \see Kernel_SetCommThreadConfig
+ */
+#define Kernel_MkInterruptID(group, irq_in_group) ((group<<5)|(irq_in_group&0x1f))
+
+
+/*!
+ * \brief Communication Thread interrupt handler function prototype
+ *
+ * \param[in] arg1 1st argument to commthread
+ * \param[in] arg2 2nd argument to commthread
+ * \param[in] arg3 3rd argument to commthread
+ */
+typedef void (*Kernel_CommThreadHandler)(uint32_t arg1, uint32_t arg2, uint32_t arg3, uint32_t arg4);
+
+/*!
+ * \brief Interrupt Group Prototype
+ *
+ * This data type is used to group interrupts of various devices together
+ * so they can be enabled or disabled simultaneously. A given interrupt user
+ * (eg. messaging, QCD, etc) specifies a value of this data type when its
+ * interrupt resources are allocated. The kernel associates those resources
+ * with the specified value so when this value is specified on the enable or
+ * disable interupts system call, all of the interrupts in the group are
+ * operated upon. Examples of devices that can be grouped in this way include
+ * DMA fifos, torus, tree, etc.
+ *
+ * \todo The kernel should provide interfaces to allocate a
+ * Kernel_InterruptGroup_t and deallocate it.
+ */
+typedef void * Kernel_InterruptGroup_t;
+
+
+/*! \defgroup COMMTHRD_OPCODES CommThread Opcodes
+ * \{
+ * \note Only 1 interrupt route can be specified per opcode
+ * \note CallFunc, DisableIntOnEntry, EnableIntOnPoof can be specified in any combination
+ * \note Current support requires that DisableIntOnEntry and EnableIntOnPoof be specified
+ */
+#define COMMTHRD_OPCODE_DISABLE 0x00 /* !< Interrupt route - Not routed / interrupt disabled */
+#define COMMTHRD_OPCODE_CORE0 0x01 /* !< Interrupt route - Dispatched on core0 */
+#define COMMTHRD_OPCODE_CORE1 0x02 /* !< Interrupt route - Dispatched on core1 */
+#define COMMTHRD_OPCODE_CORE2 0x03 /* !< Interrupt route - Dispatched on core2 */
+#define COMMTHRD_OPCODE_CORE3 0x04 /* !< Interrupt route - Dispatched on core3 */
+#define COMMTHRD_OPCODE_BCAST 0x05 /* !< Interrupt route - Dispatched on all cores */
+#define COMMTHRD_OPCODE_ROUTEMASK 0x0F /* !< Interrupt route mask */
+#define COMMTHRD_OPCODE_CALLFUNC 0x10 /* !< The provided function will be called on the comm. thread */
+#define COMMTHRD_OPCODE_DISABLEINTONENTRY 0x20 /* !< Interrupts using cntrid will be disabled when comm. thread is invoked */
+#define COMMTHRD_OPCODE_ENABLEINTONPOOF 0x40 /* !< Interrupts using cntrid will be enabled when comm. thread poofs */
+
+
+/*!
+ * \brief Sets kernel data structures needed to dispatch a communications thread
+ *
+ * Each interrupt on BGP can be used to launch a communications thread. Since access to the
+ * interrupt controller is privileged, the function exposes some interrupt control to the
+ * user application.
+ * \pre Counter must have been allocated via the LockBox_AllocateCounter() routine.
+ * \pre It is recommended that Kernel_DisableInteruptClass() be called twice on the counter
+ * to ensure that the interrupt is disabled until all interrupts for the counter
+ * have been configured.
+ * \pre All
+ * \post After the last call to Kernel_SetCommThreadConfig for the counter, invoke
+ * Kernel_EnableInterruptClass() and Kernel_HardEnableInterruptClass() on
+ * that counter to enable the interrupts for that class.
+ * \see LockBox_AllocateCounter
+ * \see Kernel_DisableInterruptClass
+ * \see Kernel_EnableInterruptClass
+ * \see Kernel_HardEnableInterruptClass
+ * \note An interrupt can only belong to 1 interrupt class (a.k.a., lockbox counter)
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Kernel may prevent changing interrupt settings for certain InterruptID values.
+ * \note If an interrupt fires on a core without a comm. thread, results are not guaranteed.
+ * \return Completion status of the command.
+ * \retval 0 no error occurred
+ * \retval EINVAL invalid parameter
+ * \param[in] InterruptID Identifies a unique interrupt line. \see Kernel_MkInterruptID
+ * \param[in] opcode Specifies what operation to perform when the interrupt occurs. Valid \ref COMMTHRD_OPCODES
+ * \param[in] cntrid ID of the lockbox counter used for interrupt enable/disable control
+ * \param[in] funcptr Function pointer that will be invoked when the interrupt fires
+ * \param[in] arg1 1st argument to the funcptr when the interrupt fires
+ * \param[in] arg2 2nd argument to the funcptr when the interrupt fires
+ * \param[in] arg3 3rd argument to the funcptr when the interrupt fires
+ *
+ */
+typedef uint32_t* LockBox_Counter_t; /*!< Counter ID definition */
+int Kernel_SetCommThreadConfig(int InterruptID,
+ int opcode,
+ LockBox_Counter_t cntrid,
+ Kernel_CommThreadHandler funcptr,
+ uint32_t arg1,
+ uint32_t arg2,
+ uint32_t arg3,
+ uint32_t arg4);
+
+
+
+/*! \brief Indicates that the kernel should disable the interrupt
+ *
+ * Updates the interrupt class's lockbox to indicate that the kernel should disable the interrupt.
+ * Kernel will disable the interrupt at its leisure, but it should ensure that no communications thread
+ * is invoked for that interrupt class.
+ *
+ * The lockbox values have the following meanings:
+ * 0: Interrupts for this classid are enabled
+ * 1: Interrupts for this classid are logically disabled.
+ * If an interrupt occurs, the kernel will hard-disable them and ignore the interrupt.
+ * 2: Interrupts for this classid are hard-disabled. The interrupt will not disturb the core.
+ *
+ * \note The effects of this function span the entire node regardless of SMP, Dual, or VNM settings.
+ * \note Do not disable an already disabled interrupt class.
+ * \note A disabled interrupt class is disabled for all 4 cores, regardless of mode.
+ * \param[in] classid An allocated lockbox that is being used to control a set of interrupt enable/disable lines
+ *
+ */
+uint32_t Kernel_DisableInterruptClass(LockBox_Counter_t classid);
+
+
+
+/*!
+ * \brief Enables/Disables the counter overflow/underflow interrupts
+ *
+ * This function is a wrapper around a system call that can enable or disable the 4 counter overflow/underflow interrupts
+ *
+ * \param[in] enable/disable boolean
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+int Kernel_ChgCounterInterruptEnables(uint32_t enable);
+
+
+/* int rts_get_personality( void * pers, size_t size ); */
+
+
+/*!
+ * \brief Update mapping info about physically contigouos application memory regions
+ * ( used only in HPC mode )
+ */
+int Kernel_UpdateAppSegmentInfo(void);
+
+
+
+/*!
+ * \brief Internal helper function for virtual to physical address translation
+ *
+ */
+
+int User_Virtual2Physical(unsigned long vaddr, /* 32bit Virtual start address */
+ size_t vsize, /* size in bytes of virtual range */
+ uint32_t *ua_out, /* upper 4 Physical Address bits */
+ uint32_t *pa_out );
+
+
+/*! \brief Translate a 32bit Virtual Address to a 36bit Physical Address, returning separated upper and lower parts.
+ *
+ * \param[in] pVA 32bit virtual address in the calling process
+ * \param[in] vsize size in bytes of the virtual range
+ * \param[out] ua_out upper 4 physical address bits
+ * \param[out] pa_out lower 32 physical address bits
+ * \return Error condition for translation
+ * \retval 0 Successful translation, with ua_out and pa_out filled in
+ * \retval -1 Invalid Virtual Address for this process, ua_out and pa_out unmodified.
+ * \retval -2 The range from vaddr to (vaddr+vsize) is not physically contiguous.
+ * \retval -3 vaddr in Scratch, but no Scratch, or not enough Scratch, is enabled.
+ * \retval -4 invalid parameter
+ *
+ * \warning Supports only Text, Data, Stack, and (optional) eDRAM Scratch translation
+ * \warning CNK "pagesize" is 1MB.
+ * \warning Text and Data are virtually contiguous, but not necessarily physically contiguous.
+ * \todo Does not (currently) support > 4GB DDR space.
+ * \todo Does not (currently) support Shared Memory Area.
+ */
+int Kernel_Virtual2Physical( void *pVA, /* input: 32bit Virtual start address */
+ size_t vsize, /* input: size in bytes of virtual range */
+ uint32_t *ua_out, /* output: upper 4 Physical Address bits */
+ uint32_t *pa_out ); /* output: lower 32 Physical Address bits */
+
+
+/*!
+ * \brief Query Free DMA Counter Subgroups within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available) subgroups within the specified group.
+ *
+ * \param[in] type Specifies whether this is an injection or
+ * reception counter group (DMA_Type_Injection
+ * or DMA_Type_Reception)
+ * \param[in] grp Group number being queried (0 to
+ * DMA_NUM_COUNTER_GROUPS-1)
+ * \param[out] num_subgroups Pointer to an int where the number of free
+ * subgroups in the specified group is returned
+ * \param[out] subgroups Pointer to an array of num_subgroups ints where
+ * the list of num_subgroups subgroups is returned.
+ * Each int is the subgroup number
+ * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1). The
+ * caller must provide space for
+ * DMA_NUM_COUNTERS_PER_SUBGROUP ints, in case the
+ * entire counter group is free.
+ *
+ * \retval 0 Successful. num_subgroups and subgroups array set as described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupQueryFree()
+ * \note The kernel may need to synchronize with other cores performing
+ * allocate or free syscalls.
+ *
+ */
+uint32_t Kernel_CounterGroupQueryFree(uint32_t type,
+ uint32_t group,
+ uint32_t * num_subgroups,
+ uint32_t * subgroups);
+
+
+/*!
+ * \brief Allocate DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that allocates DMA counters
+ * from the specified group. Counters may be allocated in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters. Parameters specify how interrupts,
+ * generated when a counter hits zero, are to be handled. A
+ * DMA_CounterGroup_t structure is returned for use in other inline
+ * functions to operate on the allocated counters.
+ *
+ * \param[in] type Specifies whether this is an injection or
+ * reception counter group (DMA_Type_Injection
+ * or DMA_Type_Reception)
+ * \param[in] grp Group number whose counters are being allocated
+ * (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in] num_subgroups Number of subgroups to be allocated from the group
+ * (1 to DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in] subgroups Pointer to an array of num_subgroups ints where
+ * the list of subgroups to be allocated is provided.
+ * Each int is the subgroup number
+ * (0 to num_subgroups-1).
+ * \param[in] target The core that will receive the interrupt when a
+ * counter in this allocation hits zero
+ * (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in] handler A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * counter in this allocation hits zero. This
+ * function must be coded to take 3 parameters:
+ * void* A pointer to storage specific to this
+ * handler. This is the handler_parm
+ * specified on this allocation function.
+ * int The counter's subgroup number (0 to
+ * DMA_NUM_COUNTER_SUBGROUPS-1).
+ * Note this number spans all groups.
+ * If handler is NULL, hit-zero interrupts will not
+ * be enabled for these counters.
+ * \param[in] handler_parm A pointer to storage that should be passed to the
+ * interrupt handling function (see handler
+ * parameter)
+ * \param[in] interruptGroup A InterruptGroup_t that identifies the
+ * group of interrupts that the counters being
+ * allocated will become part of.
+ * \param[out] cg_ptr Pointer to a structure that is filled in upon
+ * successful return for use in other inline
+ * functions to operate on the allocated counters.
+ * \li counter - Array of software counter
+ * structures. Each element
+ * points to the corresponding
+ * hardware counter in DMA SRAM.
+ * Pointers are null if not
+ * allocated).
+ * Counters are initialized to
+ * DMA_COUNTER_INIT_VAL,
+ * disabled, their hit_zero bit
+ * is off, base and max are NULL.
+ * \li status_ptr - Points to status area within the
+ * DMA memory map.
+ * \li permissions - Bits set for each allocated
+ * counter
+ * \li grp_permissions - Permissions for each
+ * subgroup
+ * \li group_id - The group number
+ * \li type - The type of DMA (injection or
+ * reception)
+ *
+ * \retval 0 Successful. Counters allocated and cg_ptr structure filled in as
+ * described.
+ * \retval -1 Unsuccessful. errno gives the reason. Nothing has been
+ * allocated.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupAllocate()
+ * \note The kernel may need to synchronize with other cores performing queries
+ * or frees.
+ *
+ */
+uint32_t Kernel_CounterGroupAllocate(uint32_t type,
+ uint32_t group,
+ uint32_t num_subgroups,
+ uint32_t * subgroups,
+ uint32_t target,
+ uint32_t handler,
+ uint32_t * handler_parm,
+ uint32_t interruptGroup,
+ uint32_t * cg_ptr);
+
+
+/*!
+ * \brief Free DMA Counters From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA counters
+ * from the specified group. Counters may be freed in subgroups of
+ * DMA_NUM_COUNTERS_PER_SUBGROUP counters.
+ *
+ * \param[in] grp Group number whose counters are being freed
+ * (0 to DMA_NUM_COUNTER_GROUPS-1)
+ * \param[in] num_subgroups Number of subgroups to be freed from the group
+ * (1-DMA_NUM_COUNTERS_PER_SUBGROUP)
+ * \param[in] subgroups Pointer to an array of num_subgroups ints where
+ * the list of subgroups to be freed is provided.
+ * Each int is the subgroup number
+ * (0 to DMA_NUM_COUNTERS_PER_SUBGROUP-1).
+ * \param[out] cg_ptr Pointer to the structure previously filled in when
+ * these counters were allocated. Upon successful
+ * return, this structure is updated to reflect the
+ * freed counters:
+ * \li counter[] - Counter structures Pointers to
+ * freed counters nulled.
+ * \li permissions - Bits cleared for each freed
+ * counter.
+ *
+ * \retval 0 Successful. Counters freed and cg_ptr structure updated as
+ * described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_CounterGroupFree()
+ * \note The kernel may need to synchronize with other cores performing allocates
+ * or queries.
+ */
+uint32_t Kernel_CounterGroupFree( uint32_t group,
+ uint32_t num_subgroups,
+ uint32_t * subgroups,
+ uint32_t * cg_ptr );
+
+
+/*!
+ * \brief Query Free DMA InjFifos within a Group
+ *
+ * This function is a wrapper around a system call that returns a list of the
+ * free (available to be allocated) fifos within the specified group.
+ *
+ * \param[in] grp Group number being queried
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1)
+ * \param[out] num_fifos Pointer to an int where the number of free
+ * fifos in the specified group is returned
+ * \param[out] fifo_ids Pointer to an array of num_fifos ints where
+ * the list of free fifos is returned.
+ * Each int is the fifo number
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * The caller must provide space for
+ * DMA_NUM_INJ_FIFOS_PER_GROUP ints,
+ * in case the entire fifo group is free.
+ *
+ * \retval 0 Successful. num_fifos and fifo_ids array set as described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupQueryFree()
+ */
+
+uint32_t Kernel_InjFifoGroupQueryFree( uint32_t group,
+ uint32_t * num_fifos,
+ uint32_t * fifo_ids);
+
+
+/*!
+ * \brief Allocate DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that allocates specified
+ * DMA injection fifos from the specified group. Parameters specify whether
+ * each fifo is high or normal priority, local or non-local, and which torus
+ * fifos it maps to. A DMA_InjFifoGroup_t structure is returned for
+ * use in other inline functions to operate on the allocated fifos.
+ *
+ * Refer to the interrupt discussion at the top of this include file to see why
+ * there are no interrupt-related parameters.
+ *
+ * \param[in] grp Group number whose DMA injection fifos are being
+ * allocated (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in] num_fifos Number of fifos to be allocated from the group
+ * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in] fifo_ids Pointer to an array of num_fifos ints where
+ * the list of fifos to be allocated is provided.
+ * Each int is the fifo number (0 to num_fifos-1).
+ * \param[in] priorities Pointer to an array of num_fifos short ints where
+ * the list of priorities to be assigned to the fifos
+ * is provided. Each short int indicates the priority
+ * to be assigned to each of the fifos identified in
+ * the fifo_ids array (0 is normal, 1 is high priority).
+ * \param[in] locals Pointer to an array of num_fifos short ints where
+ * an indication is provided of whether each fifo will
+ * be used for local transfers (within the same node)
+ * or torus transfers. Each short int indicates the
+ * local/non-local attribute to be assigned to each of
+ * the fifos identified in the fifo_ids array (0 is
+ * non-local, 1 is local). If 0, the corresponding
+ * array element in ts_inj_maps indicates which torus
+ * fifos can be injected.
+ * \param[in] ts_inj_maps Pointer to an array of num_fifos short ints where
+ * the torus fifos that can be injected are specified
+ * for each fifo. Each short int specifies which of
+ * the 8 torus injection fifos can be injected when a
+ * descriptor is injected into the DMA injection fifo.
+ * Must be non-zero when the corresponding "locals"
+ * is 0.
+ * \param[out] fg_ptr Pointer to a structure that is filled in upon
+ * successful return for use in other inline functions
+ * to operate on the allocated fifos.
+ * \li fifos - Array of fifo structures. Structures
+ * for allocated fifos are initialized as
+ * documented below. Structures for
+ * fifos not allocated by this instance of
+ * this syscall are initialized to binary
+ * zeros. Allocated fifos are enabled.
+ * \li status_ptr - Points to status area within the
+ * DMA memory map.
+ * \li permissions - Bits indicating which fifos were
+ * allocated during this syscall.
+ * \li group_id - The id of this group.
+ *
+ * \retval 0 Successful. Fifos allocated and fg_ptr structure filled in as
+ * described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupAllocate()
+ * \return The group fifo structure pointed to by fg_ptr is completely
+ * initialized as follows:
+ * - status_ptr points to the appropriate fifo group DMA memory map
+ * - fifo structures array. Fifo structures for fifos not allocated
+ * during this syscall are initialized to binary zeros. Fifo
+ * structures for fifos allocated during this syscall are initialized:
+ * - fifo_hw_ptr points to the DMA memory map for this fifo. The
+ * hardware start, end, head, and tail are set to zero by the
+ * kernel.
+ * - All other fields in the structure are set to zero by the kernel
+ * except priority, local, and ts_inj_map are set to reflect what
+ * was requested in the priorities, locals, and ts_inj_maps
+ * syscall parameters.
+ *
+ */
+uint32_t Kernel_InjFifoGroupAllocate( uint32_t group,
+ uint32_t num_fifos,
+ uint32_t * fifo_ids,
+ uint16_t * priorities,
+ uint16_t * locals,
+ uint8_t * ts_inj_maps,
+ uint32_t * fg_ptr );
+
+
+
+/*!
+ * \brief Free DMA InjFifos From A Group
+ *
+ * This function is a wrapper around a system call that frees DMA injection
+ * counters from the specified group.
+ *
+ * \param[in] grp Group number whose DMA injection fifos are being
+ * freed (0 to DMA_NUM_INJ_FIFO_GROUPS-1)
+ * \param[in] num_fifos Number of fifos to be freed from the group
+ * (1 to DMA_NUM_INJ_FIFOS_PER_GROUP)
+ * \param[in] fifo_ids Pointer to an array of num_fifos ints where
+ * the list of fifos to be freed is provided.
+ * Each int is the fifo number (0 to num_fifos-1).
+ * \param[in] fg_ptr Pointer to the structure previously filled in when
+ * these fifos were allocated. Upon successful
+ * return, this structure is updated to reflect the
+ * freed fifos:
+ * \li fifos - Structures for freed fifos zero'd.
+ * Freed fifos are disabled.
+ * \li permissions - Bits cleared for each freed fifo.
+ *
+ * \retval 0 Successful. Fifos freed and fg_ptr structure updated as described.
+ * \retval -1 Unsuccessful. errno gives the reason.
+ *
+ * \internal This function is not intended to be called directly
+ * \see DMA_InjFifoGroupFree()
+ * \note This is a fatal error if any of the fifos are non empty and activated
+ *
+ */
+uint32_t Kernel_InjFifoGroupFree(uint32_t group,
+ uint32_t num_fifos,
+ uint32_t * fifo_ids,
+ uint32_t * fg_ptr);
+
+
+
+/*!
+ * \brief DMA InjFifo Initialization By Id
+ *
+ * - For an allocated injection DMA fifo, initialize its start, head, tail, and
+ * end.
+ * - Compute fifo size and free space.
+ * - Initialize wrap count.
+ * - Activate the fifo.
+ *
+ * \param[in] fg_ptr Pointer to fifo group structure.
+ * \param[in] fifo_id Id of the fifo to be initialized
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ * \param[in] va_start Virtual address of the start of the fifo.
+ * \param[in] va_head Virtual address of the head of the fifo (typically
+ * equal to va_start).
+ * \param[in] va_end Virtual address of the end of the fifo.
+ *
+ * \retval 0 Successful.
+ * \retval -1 Unsuccessful. Error checks include
+ * - va_start < va_end
+ * - va_start <= va_head <=
+ * (va_end - DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ * - va_start and va_end are 32-byte aligned
+ * - fifo_size is larger than (DMA_MIN_INJECT_SIZE_IN_QUADS +
+ * DMA_FIFO_DESCRIPTOR_SIZE_IN_QUADS)
+ *
+ */
+uint32_t Kernel_InjFifoInitById(uint32_t * fg_ptr,
+ int fifo_id,
+ uint32_t * va_start,
+ uint32_t * va_head,
+ uint32_t * va_end);
+
+
+
+/*!
+ * \brief Set DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that
+ * - Sets DCRs establishing the map between the hardware torus fifos and the
+ * DMA reception fifos that are to receive the packets from those hardware
+ * torus fifos.
+ * - Sets DCRs establishing the DMA reception fifos that are to receive
+ * local transfer packets.
+ * - Sets the DCRs establishing the type (0 or 1) of each reception fifo.
+ * - Sets the DCRs establishing the threshold for type 0 and 1 reception fifos.
+ * - Leaves all of the fifos that are used in a "disabled" state.
+ * DMA_RecFifoInitById() initializes and enables the fifos.
+ *
+ * \param[in] rec_map Reception Fifo Map structure, defining the mapping.
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ * \internal This is an internal syscall
+ * \see DMA_RecFifoSetMap
+ * \note This function should be called once per job, after DMA_ResetRelease().
+ * It may be called by any core, but once a core has called it, other
+ * calls by that same core or any other core will fail.
+ *
+ * \note During job init, the kernel sets up the DCR clear masks for each
+ * reception fifo group (DCRs 0xD68 - 0xD6C) such that a write to clear
+ * a fifo in group g only clears group g.
+ *
+ */
+int Kernel_RecFifoSetMap(uint32_t* rec_map);
+
+
+/*!
+ * \brief Get DMA Reception Fifo Map
+ *
+ * This function is a wrapper around a system call that returns a DMA
+ * reception fifo map structure, filled in according to the DCRs.
+ *
+ * \param[in,out] rec_map A pointer to a Reception Fifo Map structure
+ * that will be filled-in upon return.
+ *
+ * \retval 0 Successful
+ * \retval error_value An error value defined in the _BGP_RAS_DMA_ErrCodes
+ * enum located in bgp/arch/include/common/bgp_ras.h
+ *
+ */
+int Kernel_RecFifoGetMap(uint32_t* rec_map);
+
+
+
+/*!
+ * \brief Get DMA Reception Fifo Group
+ *
+ * This is a wrapper around a System Call. This function returns THE
+ * one-and-only pointer to the fifo group structure, with the entries all
+ * filled in from info in the DCRs. If called multiple times with the same
+ * group, it will always return the same pointer, and the system call will
+ * not be invoked again.
+ *
+ * It must be called AFTER DMA_RecFifoSetMap().
+ *
+ * By convention, the same "target" is used for normal and header fifo
+ * interrupts (could be changed). In addition, by convention, interrupts for
+ * fifos in group g come out of the DMA as non-fatal irq bit 28+g,
+ * ie, only fifos in group g can cause the "type g" threshold interrupts.
+ *
+ * \param[in] grp The group number (0 through DMA_NUM_REC_FIFO_GROUPS).
+ * \param[in] target The core that will receive the interrupt when a
+ * fifo in this group reaches its threshold
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * Ignored on subsequent call with the same group.
+ * \param[in] normal_handler A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * normal fifo in this group reaches its threshold.
+ * This function must be coded to take 2 parameters:
+ * void* A pointer to storage specific to this
+ * handler. This is the normal_handler_parm
+ * specified on this function call.
+ * int The global fifo ID of the fifo that hit
+ * its threshold (0 through
+ * NUM_DMA_REC_FIFOS-1).
+ * If normal_handler is NULL, threshold interrupts
+ * are not delivered for normal fifos in this group.
+ * Ignored on subsequent call with the same group.
+ * \param[in] normal_handler_parm A pointer to storage that should be passed
+ * to the normal interrupt handling function
+ * (see normal_handler parameter).
+ * Ignored on subsequent call with the same
+ * group.
+ * \param[in] header_handler A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * header fifo in this group reaches its threshold.
+ * This function must be coded to take 2 parameters:
+ * void* A pointer to storage specific to this
+ * handler. This is the header_handler_parm
+ * specified on this function call.
+ * int The global fifo ID of the fifo that hit
+ * its threshold (0 through
+ * NUM_DMA_REC_FIFOS-1).
+ * If header_handler is NULL, threshold interrupts
+ * are not delivered for header fifos in this group.
+ * Ignored on subsequent call with the same group.
+ * \param[in] header_handler_parm A pointer to storage that should be passed
+ * to the header interrupt handling function
+ * (see header_handler parameter).
+ * Ignored on subsequent call with the same
+ * group.
+ * \param[in] interruptGroup A InterruptGroup_t that identifies the
+ * group of interrupts that the fifos in this group
+ * will become part of.
+ * Ignored on subsequent call with the same group.
+ *
+ * \return RecFifoGroupStruct Pointer to a DMA Reception Fifo Group structure
+ * that reflects the fifos that are being used in
+ * this group. This same structure is shared by
+ * all users of this reception fifo group.
+ * NULL is returned if an error occurs.
+ *
+ * \note The following comments from Phil about the internals of the syscall:
+ * - error checks
+ * - 0 <= group_id < 4
+ * - the start of the fifo group is a valid virtual address (tlb mapped)?
+ * - disable the rDMA
+ * - call _BGP_rDMA_Fifo_Get_Map to get the DCR mapping information
+ * - loop through the map to determine how many and which fifos in this group
+ * are used, including headers
+ * - filling in the addresses of used fifos
+ * - In particular, any pointer to any fifo in the group that is not used
+ * will have a null pointer
+ * - furthermore,
+ * - write starting values to all used fifos
+ * - make sure all interrupts are cleared
+ * - enable rDMA
+ *
+ */
+int Kernel_RecFifoGetFifoGroup(
+ uint32_t * fifogroup,
+ int grp,
+ int target,
+ void * normal_handler,
+ void * normal_handler_parm,
+ void * header_handler,
+ void * header_handler_parm,
+ void * interruptGroup
+ );
+
+
+
+/*!
+ * \brief DMA RecFifo Initialization By Id
+ *
+ * - For a DMA reception fifo, initialize its start, head, tail, and end.
+ * - Compute fifo size and free space.
+ *
+ * \param[in] fg_ptr Pointer to fifo group structure.
+ * \param[in] fifo_id Id of the fifo to be initialized
+ * (0 to DMA_NUM_REC_FIFOS_PER_GROUP-1).
+ * \param[in] va_start Virtual address of the start of the fifo.
+ * \param[in] va_head Virtual address of the head of the fifo (typically
+ * equal to va_start).
+ * \param[in] va_end Virtual address of the end of the fifo.
+ *
+ * \retval 0 Successful.
+ * \retval -1 Unsuccessful. Error checks include
+ * - va_start < va_end
+ * - va_start <= va_head < va_end
+ * - va_start and va_end are 32-byte aligned
+ * - fifo_size >= DMA_MIN_REC_FIFO_SIZE_IN_BYTES
+ *
+ */
+int Kernel_RecFifoInitById( uint32_t * fg_ptr,
+ int fifo_id,
+ void * va_start,
+ void * va_head,
+ void * va_end );
+
+
+
+
+#endif /* Add nothing below this line */
diff --git a/arch/powerpc/include/spi/linux_kernel_spi.h b/arch/powerpc/include/spi/linux_kernel_spi.h
new file mode 100644
index 00000000000000..05d32f88d8cb2b
--- /dev/null
+++ b/arch/powerpc/include/spi/linux_kernel_spi.h
@@ -0,0 +1,113 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+
+
+#ifndef _LINUX_KERNEL_SPI_H_ /* Prevent multiple inclusion */
+#define _LINUX_KERNEL_SPI_H_
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include <common/bgp_personality.h>
+
+#ifndef __LINUX_KERNEL__
+#define __LINUX_KERNEL__
+#endif
+
+#ifndef __BGP_HIDE_STANDARD_TYPES__
+#define __BGP_HIDE_STANDARD_TYPES__
+#endif
+
+
+/* this comes from src/arch/ppc/platforms/4xx/bluegene.c */
+extern int bluegene_getPersonality(void *buf, int bufsize);
+#define rts_get_personality(p,s) bluegene_getPersonality(p,s)
+
+
+/* Lockbox used by DMA_InjFifoRgetFifoFullInit ... */
+#define LockBox_FetchAndClear(x)
+
+
+/* asm inlines used by dma spi */
+
+#define _bgp_msync(void) asm volatile ("msync" : : : "memory")
+#define _bgp_mbar(void) asm volatile ("mbar" : : : "memory")
+#define _bgp_isync(void) asm volatile ("isync" : : : "memory")
+extern inline void _bgp_msync_nonspeculative( void )
+{
+ do {
+ asm volatile (" b 1f;"
+ " nop;"
+ "1: msync;"
+ : : : "memory");
+ }
+ while(0);
+}
+
+#define _bgp_QuadLoad(v,f) asm volatile( "lfpdx " #f ",0,%0" :: "r" (v) : "fr" #f )
+#define _bgp_QuadStore(v,f) asm volatile( "stfpdx " #f ",0,%0" :: "r" (v) : "memory" )
+
+#define _bgp_dcache_touch_line(v) do { asm volatile ("dcbt 0,%0" : : "r" (v)); } while(0)
+
+/* in ppc450_inlines.h */
+/* #define _bgp_msync_nonspeculative(x) */
+/* { */
+/* do { */
+/* asm volatile (" b 1f;" */
+/* " nop;" */
+/* "1: msync;" */
+/* : : : "memory"); */
+/* } */
+/* while(0); */
+/* } */
+
+/* assert and printf variants for kernel use */
+
+#define assert(x) if ( !(x)) printk( KERN_ALERT "(E) bgpdma assertion at %s:%d\n",__FILE__,__LINE__);
+
+#define SPI_assert(x) assert(x)
+
+#define printf(...) printk(KERN_INFO __VA_ARGS__)
+
+/* we need a dummy errno for linking */
+static int errno;
+
+/* general bgp quad struct */
+/* (better one in bgp_types.h , use that in preference) */
+/* typedef struct { u32 w[4]; } __attribute__ ((aligned(16))) _bgp_QuadWord_t; */
+
+
+/* virtual base address of the DMA (see bgp_dma_memap.h) */
+#define _BGP_VA_DMA bgpdma_kaddr
+
+#include <asm/bgp_personality.h>
+#include <common/alignment.h>
+#include <bpcore/bgp_dma_memmap.h>
+#include <bpcore/ic_memmap.h>
+
+#include <spi/DMA_Counter.h>
+#include <spi/DMA_Fifo.h>
+#include <spi/DMA_InjFifo.h>
+#include <spi/DMA_RecFifo.h>
+
+
+
+#endif
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 8d1a419df35d78..14037caa20c0a9 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -4,6 +4,8 @@
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+EXTRA_CFLAGS =
+
ifeq ($(CONFIG_PPC64),y)
CFLAGS_prom_init.o += -mno-minimal-toc
endif
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 923f87aff20a45..65c67c44d830e0 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1634,6 +1634,19 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_440A,
.platform = "ppc440",
},
+ { /* Blue Gene/P */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x52131880,
+ .cpu_name = "450 Blue Gene/P",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
{ /* default match */
.pvr_mask = 0x00000000,
.pvr_value = 0x00000000,
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index a088c064ae4055..33e2edd41d79eb 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -29,7 +29,7 @@
BEGIN_FTR_SECTION \
b 2f; \
END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
- REST_32FPRS(n,base); \
+ REST_32FPRS(n,c,base); \
b 3f; \
2: REST_32VSRS(n,c,base); \
3:
@@ -38,13 +38,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
BEGIN_FTR_SECTION \
b 2f; \
END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
- SAVE_32FPRS(n,base); \
+ SAVE_32FPRS(n,c,base); \
b 3f; \
2: SAVE_32VSRS(n,c,base); \
3:
#else
-#define REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
-#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
+#define REST_32FPVSRS(n,b,base) REST_32FPRS(n,b,base)
+#define SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n,b,base)
#endif
/*
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index b56fecc93a16c6..9ef78498efaf9a 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -143,7 +143,11 @@ skpinv: addi r4,r4,1 /* Increment */
sync
/* Initialize MMUCR */
+#ifdef CONFIG_L1_WRITETHROUGH
+ lis r5,PPC44x_MMUCR_U2SWOAE@h
+#else
li r5,0
+#endif
mtspr SPRN_MMUCR,r5
sync
@@ -158,7 +162,12 @@ skpinv: addi r4,r4,1 /* Increment */
/* attrib fields */
/* Added guarded bit to protect against speculative loads/stores */
li r5,0
+#ifdef CONFIG_L1_WRITETHROUGH
+ ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_M | PPC44x_TLB_U2)
+ oris r5,r5,PPC44x_TLB_WL1@h
+#else
ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+#endif
li r0,63 /* TLB slot 63 */
@@ -228,6 +237,14 @@ skpinv: addi r4,r4,1 /* Increment */
lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */
mtspr SPRN_IVPR,r4
+#ifdef CONFIG_SMP
+ /* are we an additional CPU? */
+ li r0, 0
+ mfspr r4, SPRN_PIR
+ cmpw r4, r0
+ bgt secondary_entry
+#endif
+
/*
* This is where the main kernel code starts.
*/
@@ -278,6 +295,70 @@ skpinv: addi r4,r4,1 /* Increment */
mtspr SPRN_SRR1,r3
rfi /* change context and jump to start_kernel */
+
+#ifdef CONFIG_SMP
+/* Extra cpus will come here. */
+//#define _GLOBAL_DEVINIT(n) \
+// .section .text.devinit; \
+// .text; \
+// .stabs __stringify(n:F-1),N_FUN,0,0,n;\
+// .globl n; \
+//n:
+//
+//_GLOBAL_DEVINIT(secondary_entry)
+secondary_entry:
+ /* Enable U2SWOA. U2 will be enabled in TLBs. */
+ lis r7,PPC44x_MMUCR_U2SWOAE@h
+ mtspr SPRN_MMUCR,r7
+ li r7,0
+ mtspr SPRN_PID,r7
+ sync
+ lis r8,KERNELBASE@h
+
+ /* The tlb_44x_hwater global var (setup by cpu#0) reveals how many
+ * 256M TLBs we need to map.
+ */
+ lis r9, tlb_44x_hwater@ha
+ lwz r9, tlb_44x_hwater@l(r9)
+
+ li r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_M|PPC44x_TLB_U2)
+ oris r5, r5, PPC44x_TLB_WL1@h
+
+2: addi r9,r9,1
+ cmpwi r9,62 /* Stop at entry 62 which is the firmware */
+ beq 3f
+ addis r7,r7,0x1000 /* add 256M */
+ addis r8,r8,0x1000
+ ori r6,r8,PPC44x_TLB_VALID | PPC44x_TLB_256M
+
+ tlbwe r6,r9,PPC44x_TLB_PAGEID /* Load the pageid fields */
+ tlbwe r7,r9,PPC44x_TLB_XLAT /* Load the translation fields */
+ tlbwe r5,r9,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */
+ b 2b
+
+3: isync
+
+ /* Setup context from global var secondary_ti */
+ lis r1, secondary_ti@ha
+ lwz r1, secondary_ti@l(r1)
+ lwz r2, TI_TASK(r1) /* r2 = task_info */
+
+ addi r3,r2,THREAD /* init task's THREAD */
+ mtspr SPRN_SPRG3,r3
+
+ li r0,0
+ stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+
+ /* Let's move on */
+ lis r4,start_secondary@h
+ ori r4,r4,start_secondary@l
+ lis r3,MSR_KERNEL@h
+ ori r3,r3,MSR_KERNEL@l
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r3
+ rfi /* change context and jump to start_secondary */
+#endif
+
/*
* Interrupt vector entry code
*
@@ -588,7 +669,15 @@ finish_tlb_load:
andi. r10,r12,_PAGE_USER /* User page ? */
beq 1f /* nope, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
-1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
+1:
+#ifdef CONFIG_L1_WRITETHROUGH
+ andi. r10, r11, PPC44x_TLB_I
+ bne 2f
+ oris r11,r11,PPC44x_TLB_WL1@h /* Add coherency for non-inhibited */
+ ori r11,r11,PPC44x_TLB_SWOA|PPC44x_TLB_M
+2:
+#endif
+ tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
*/
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 15f28e0de78dae..7282d2f4d40110 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -491,6 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
*
* void clear_pages(void *page, int order) ;
*/
+#if !defined(CONFIG_WRAP_COPY_TOFROM_USER)
_GLOBAL(clear_pages)
li r0,PAGE_SIZE/L1_CACHE_BYTES
slw r0,r0,r4
@@ -502,12 +503,25 @@ _GLOBAL(clear_pages)
stw r4, 8(r3)
stw r4, 12(r3)
#else
+#ifdef CONFIG_L1_WRITETHROUGH
+ /* assuming 32 byte cacheline */
+ li r4, 0
+1: stw r4, 0(r3)
+ stw r4, 4(r3)
+ stw r4, 8(r3)
+ stw r4, 12(r3)
+ stw r4, 16(r3)
+ stw r4, 20(r3)
+ stw r4, 24(r3)
+ stw r4, 28(r3)
+#else
1: dcbz 0,r3
#endif
+#endif
addi r3,r3,L1_CACHE_BYTES
bdnz 1b
blr
-
+#endif
/*
* Copy a whole page. We use the dcbz instruction on the destination
* to reduce memory traffic (it eliminates the unnecessary reads of
@@ -524,6 +538,7 @@ _GLOBAL(clear_pages)
stw r8,12(r3); \
stwu r9,16(r3)
+#if !defined(CONFIG_WRAP_COPY_TOFROM_USER)
_GLOBAL(copy_page)
addi r3,r3,-4
addi r4,r4,-4
@@ -556,7 +571,9 @@ _GLOBAL(copy_page)
mtctr r0
1:
dcbt r11,r4
+#ifndef CONFIG_L1_WRITETHROUGH
dcbz r5,r3
+#endif
COPY_16_BYTES
#if L1_CACHE_BYTES >= 32
COPY_16_BYTES
@@ -578,6 +595,7 @@ _GLOBAL(copy_page)
li r11,4
b 2b
#endif /* CONFIG_8xx */
+#endif /* CONFIG_WRAP_COPY_TOFROM_USER */
/*
* void atomic_clear_mask(atomic_t mask, atomic_t *addr)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 65484b2200b36a..e39ab04671c0f9 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -489,6 +489,14 @@ int __devinit start_secondary(void *unused)
current->active_mm = &init_mm;
smp_store_cpu_info(cpu);
+
+#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+ /* Clear any pending timer interrupts */
+ mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
+
+ /* Enable decrementer interrupt */
+ mtspr(SPRN_TCR, TCR_DIE);
+#endif
set_dec(tb_ticks_per_jiffy);
preempt_disable();
cpu_callin_map[cpu] = 1;
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 8db35278a4b436..886a01bc6a65af 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
obj-y += code-patching.o
obj-y += feature-fixups.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
+obj-$(CONFIG_WRAP_COPY_TOFROM_USER) += copy_tofrom_user.o
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index c657de59abca8c..a774f03fb7b3cd 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -98,7 +98,7 @@ _GLOBAL(cacheable_memzero)
bdnz 4b
3: mtctr r9
li r7,4
-#if !defined(CONFIG_8xx)
+#if !defined(CONFIG_8xx) && !defined(CONFIG_L1_WRITETHROUGH)
10: dcbz r7,r6
#else
10: stw r4, 4(r6)
@@ -200,7 +200,7 @@ _GLOBAL(cacheable_memcpy)
mtctr r0
beq 63f
53:
-#if !defined(CONFIG_8xx)
+#if !defined(CONFIG_8xx) && !defined(CONFIG_L1_WRITETHROUGH)
dcbz r11,r6
#endif
COPY_16_BYTES
@@ -318,7 +318,11 @@ _GLOBAL(backwards_memcpy)
mtctr r7
b 1b
+#if defined(CONFIG_WRAP_COPY_TOFROM_USER)
+_GLOBAL(__real__copy_tofrom_user)
+#else
_GLOBAL(__copy_tofrom_user)
+#endif
addi r4,r4,-4
addi r6,r3,-4
neg r0,r3
@@ -391,7 +395,11 @@ _GLOBAL(__copy_tofrom_user)
mtctr r8
53: dcbt r3,r4
+#ifdef CONFIG_L1_WRITETHROUGH
+54:
+#else
54: dcbz r11,r6
+#endif
.section __ex_table,"a"
.align 2
.long 54b,105f
diff --git a/arch/powerpc/lib/copy_tofrom_user.c b/arch/powerpc/lib/copy_tofrom_user.c
new file mode 100644
index 00000000000000..525da59b188cff
--- /dev/null
+++ b/arch/powerpc/lib/copy_tofrom_user.c
@@ -0,0 +1,19 @@
+#include <linux/kernel.h>
+
+extern unsigned long __real__copy_tofrom_user(void *to,
+ const void __user *from, unsigned long size) ;
+
+#if defined(CONFIG_BGP_TORUS)
+extern unsigned long bgp_fpu_instrument_copy_tofrom_user(void *to,
+ const void __user *from, unsigned long size) ;
+#endif
+
+unsigned long __copy_tofrom_user(void *to,
+ const void __user *from, unsigned long size)
+{
+#if defined(CONFIG_BGP_TORUS)
+ int rc=bgp_fpu_instrument_copy_tofrom_user(to, from, size) ;
+ if( 0 == rc) return 0 ;
+#endif
+ return __real__copy_tofrom_user(to, from, size) ;
+}
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 98052ac96580d6..dc1742a4b8ee25 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -64,6 +64,12 @@ static void __init ppc44x_update_tlb_hwater(void)
static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
{
unsigned int entry = tlb_44x_hwater--;
+ unsigned attrs = PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX;
+#ifdef CONFIG_L1_WRITETHROUGH
+ attrs |= PPC44x_TLB_WL1 | PPC44x_TLB_SWOA | PPC44x_TLB_M;
+#else
+ attrs |= PPC44x_TLB_G;
+#endif
ppc44x_update_tlb_hwater();
@@ -72,7 +78,7 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
"tlbwe %1,%3,%5\n"
"tlbwe %0,%3,%6\n"
:
- : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
+ : "r" (attrs),
"r" (phys),
"r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
"r" (entry),
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 953cc4a1cde560..c913931927c1ae 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -15,6 +15,8 @@ hash-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC64) += hash_utils_64.o \
slb_low.o slb.o stab.o \
gup.o mmap.o $(hash-y)
+
+obj-$(CONFIG_BGP) += mmap.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o
obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
tlb_hash$(CONFIG_WORD_SIZE).o \
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 91c7b8636b8a75..573d8c4b221e13 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -101,6 +101,73 @@ static int store_updates_sp(struct pt_regs *regs)
return 0;
}
+#ifdef CONFIG_BGP
+/* The icbi instruction does not broadcast to all cpus in the ppc450 processor used
+ * by Blue Gene/P. It is unlikely this problem will be exhibited in other processors
+ * so this remains ifdef'ed for BGP specifically.
+ *
+ * We deal with this by marking executable pages either writable, or executable, but
+ * never both. The permissions will fault back and forth if the thread is actively
+ * writing to executable sections. Each time we fault to become executable we flush
+ * the dcache into icache on all cpus.
+ */
+struct bgp_fixup_parm {
+ struct page *page;
+ unsigned long address;
+ struct vm_area_struct *vma;
+};
+static void bgp_fixup_cache_tlb(void *parm)
+{
+ struct bgp_fixup_parm *p = parm;
+
+ if (!PageHighMem(p->page))
+ flush_dcache_icache_page(p->page);
+ local_flush_tlb_page(p->vma, p->address);
+}
+
+static void bgp_fixup_access_perms(struct vm_area_struct *vma,
+ unsigned long address,
+ int is_write, int is_exec)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pte_t *ptep = NULL;
+ pmd_t *pmdp;
+
+ if (get_pteptr(mm, address, &ptep, &pmdp)) {
+ spinlock_t *ptl = pte_lockptr(mm, pmdp);
+ pte_t old;
+
+ spin_lock(ptl);
+ old = *ptep;
+ if (pte_present(old)) {
+ struct page *page = pte_page(old);
+
+ if (is_exec) {
+ struct bgp_fixup_parm param = {
+ .page = page,
+ .address = address,
+ .vma = vma,
+ };
+ pte_update(ptep, _PAGE_HWWRITE, 0);
+ on_each_cpu(bgp_fixup_cache_tlb, &param, 1);
+ pte_update(ptep, 0, _PAGE_HWEXEC);
+ pte_unmap_unlock(ptep, ptl);
+ return;
+ }
+ if (is_write &&
+ (pte_val(old) & _PAGE_RW) &&
+ (pte_val(old) & _PAGE_DIRTY) &&
+ !(pte_val(old) & _PAGE_HWWRITE)) {
+ pte_update(ptep, _PAGE_HWEXEC, _PAGE_HWWRITE);
+ }
+ }
+ if (!pte_same(old, *ptep))
+ flush_tlb_page(vma, address);
+ pte_unmap_unlock(ptep, ptl);
+ }
+}
+#endif
+
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
* for a data fault, SRR1 for an instruction fault. For 400-family processors
@@ -266,6 +333,7 @@ good_area:
!(vma->vm_flags & (VM_READ | VM_WRITE))))
goto bad_area;
#else
+#ifndef CONFIG_BGP
pte_t *ptep;
pmd_t *pmdp;
@@ -292,6 +360,7 @@ good_area:
pte_unmap_unlock(ptep, ptl);
}
#endif
+#endif
/* a write */
} else if (is_write) {
if (!(vma->vm_flags & VM_WRITE))
@@ -330,6 +399,12 @@ good_area:
#endif
} else
current->min_flt++;
+
+#ifdef CONFIG_BGP
+ /* Fixup _PAGE_HWEXEC and _PAGE_HWWRITE if necessary */
+ bgp_fixup_access_perms(vma, address, is_write, is_exec);
+#endif
+
up_read(&mm->mmap_sem);
return 0;
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 86010fc7d3b17a..2db81290964be7 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -48,11 +48,13 @@ static inline unsigned long mmap_base(void)
static inline int mmap_is_legacy(void)
{
+#if defined(CONFIG_64BIT)
/*
* Force standard allocation for 64 bit programs.
*/
if (!test_thread_flag(TIF_32BIT))
return 1;
+#endif
if (current->personality & ADDR_COMPAT_LAYOUT)
return 1;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 52a0cfc38b6488..ba043c4c564e0f 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -46,7 +46,7 @@ static unsigned int next_context, nr_free_contexts;
static unsigned long *context_map;
static unsigned long *stale_map[NR_CPUS];
static struct mm_struct **context_mm;
-static spinlock_t context_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(context_lock);
#define CTX_MAP_SIZE \
(sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
@@ -73,7 +73,6 @@ static unsigned int steal_context_smp(unsigned int id)
struct mm_struct *mm;
unsigned int cpu, max;
- again:
max = last_context - first_context;
/* Attempt to free next_context first and then loop until we manage */
@@ -108,7 +107,9 @@ static unsigned int steal_context_smp(unsigned int id)
spin_unlock(&context_lock);
cpu_relax();
spin_lock(&context_lock);
- goto again;
+
+ /* This will cause the caller to try again */
+ return MMU_NO_CONTEXT;
}
#endif /* CONFIG_SMP */
@@ -127,12 +128,12 @@ static unsigned int steal_context_up(unsigned int id)
pr_debug("[%d] steal context %d from mm @%p\n", cpu, id, mm);
- /* Mark this mm has having no context anymore */
- mm->context.id = MMU_NO_CONTEXT;
-
/* Flush the TLB for that context */
local_flush_tlb_mm(mm);
+ /* Mark this mm has having no context anymore */
+ mm->context.id = MMU_NO_CONTEXT;
+
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
__clear_bit(id, stale_map[cpu]);
@@ -194,6 +195,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
WARN_ON(prev->context.active < 1);
prev->context.active--;
}
+
+ again:
#endif /* CONFIG_SMP */
/* If we already have a valid assigned context, skip all that */
@@ -212,6 +215,8 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
#ifdef CONFIG_SMP
if (num_online_cpus() > 1) {
id = steal_context_smp(id);
+ if (id == MMU_NO_CONTEXT)
+ goto again;
goto stolen;
}
#endif /* CONFIG_SMP */
@@ -272,6 +277,7 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm)
*/
void destroy_context(struct mm_struct *mm)
{
+ unsigned long flags;
unsigned int id;
if (mm->context.id == MMU_NO_CONTEXT)
@@ -279,18 +285,18 @@ void destroy_context(struct mm_struct *mm)
WARN_ON(mm->context.active != 0);
- spin_lock(&context_lock);
+ spin_lock_irqsave(&context_lock, flags);
id = mm->context.id;
if (id != MMU_NO_CONTEXT) {
__clear_bit(id, context_map);
mm->context.id = MMU_NO_CONTEXT;
#ifdef DEBUG_MAP_CONSISTENCY
mm->context.active = 0;
- context_mm[id] = NULL;
#endif
+ context_mm[id] = NULL;
nr_free_contexts++;
}
- spin_unlock(&context_lock);
+ spin_unlock_irqrestore(&context_lock, flags);
}
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 3496bc05058ed6..f08863a5bd9805 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -2,7 +2,6 @@ config BAMBOO
bool "Bamboo"
depends on 44x
default n
- select PPC44x_SIMPLE
select 440EP
select PCI
help
@@ -31,7 +30,6 @@ config SEQUOIA
bool "Sequoia"
depends on 44x
default n
- select PPC44x_SIMPLE
select 440EPX
help
This option enables support for the AMCC PPC440EPX evaluation board.
@@ -40,7 +38,6 @@ config TAISHAN
bool "Taishan"
depends on 44x
default n
- select PPC44x_SIMPLE
select 440GX
select PCI
help
@@ -51,7 +48,6 @@ config KATMAI
bool "Katmai"
depends on 44x
default n
- select PPC44x_SIMPLE
select 440SPe
select PCI
select PPC4xx_PCI_EXPRESS
@@ -62,7 +58,6 @@ config RAINIER
bool "Rainier"
depends on 44x
default n
- select PPC44x_SIMPLE
select 440GRX
select PCI
help
@@ -81,53 +76,34 @@ config WARP
See http://www.pikatechnologies.com/ and follow the "PIKA for Computer
Telephony Developers" link for more information.
-config ARCHES
- bool "Arches"
- depends on 44x
- default n
- select PPC44x_SIMPLE
- select 460EX # Odd since it uses 460GT but the effects are the same
- select PCI
- select PPC4xx_PCI_EXPRESS
- help
- This option enables support for the AMCC Dual PPC460GT evaluation board.
-
config CANYONLANDS
bool "Canyonlands"
depends on 44x
default n
- select PPC44x_SIMPLE
select 460EX
select PCI
select PPC4xx_PCI_EXPRESS
- select IBM_NEW_EMAC_RGMII
- select IBM_NEW_EMAC_ZMII
help
This option enables support for the AMCC PPC460EX evaluation board.
-config GLACIER
- bool "Glacier"
- depends on 44x
- default n
- select PPC44x_SIMPLE
- select 460EX # Odd since it uses 460GT but the effects are the same
- select PCI
- select PPC4xx_PCI_EXPRESS
- select IBM_NEW_EMAC_RGMII
- select IBM_NEW_EMAC_ZMII
- help
- This option enables support for the AMCC PPC460GT evaluation board.
-
config YOSEMITE
bool "Yosemite"
depends on 44x
default n
- select PPC44x_SIMPLE
select 440EP
select PCI
help
This option enables support for the AMCC PPC440EP evaluation board.
+config BGP
+ bool "Blue Gene/P"
+ depends on 44x
+ default y
+ select BLUEGENE
+ help
+ This option enables support for the IBM Blue Gene/P supercomputer.
+
+
#config LUAN
# bool "Luan"
# depends on 44x
@@ -160,21 +136,6 @@ config XILINX_VIRTEX440_GENERIC_BOARD
Most Virtex 5 designs should use this unless it needs to do some
special configuration at board probe time.
-config PPC44x_SIMPLE
- bool "Simple PowerPC 44x board support"
- depends on 44x
- default n
- help
- This option enables the simple PowerPC 44x platform support.
-
-config PPC4xx_GPIO
- bool "PPC4xx GPIO support"
- depends on 44x
- select ARCH_REQUIRE_GPIOLIB
- select GENERIC_GPIO
- help
- Enable gpiolib support for ppc440 based boards
-
# 44x specific CPU modules, selected based on the board above.
config 440EP
bool
@@ -218,6 +179,8 @@ config 460EX
bool
select PPC_FPU
select IBM_NEW_EMAC_EMAC4
+ select IBM_NEW_EMAC_RGMII
+ select IBM_NEW_EMAC_ZMII
select IBM_NEW_EMAC_TAH
# 44x errata/workaround config symbols, selected by the CPU models above
@@ -233,3 +196,185 @@ config XILINX_VIRTEX_5_FXT
bool
select XILINX_VIRTEX
+config BLUEGENE
+ bool
+ select PPC_FPU
+ select PPC_DOUBLE_FPU
+
+config BLUEGENE_NOISY_BOOT
+ bool "Send Blue Gene boot messages to the control system"
+ depends on BLUEGENE
+ default n
+ help
+ Select this if you need to diagnose faults with the IO or Compute node kernel boot.
+
+config BLUEGENE_MAMBO
+ bool "Run on Blue Gene/P Mambo Simulator"
+ depends on BGP
+
+config L1_WRITETHROUGH
+ bool "Blue Gene enable writethrough mode"
+ depends on BLUEGENE
+ default n
+
+config BGP_DD1
+ bool "Blue Gene enable workarounds for BG/P DD1"
+ default n
+
+config BLUEGENE_TCP
+ bool "Blue Gene/P TCP on Torus"
+ default y if BGP
+
+config BLUEGENE_DMA_MEMCPY
+ bool "Blue Gene copy_tofrom_user optimisation with the torus DMA unit"
+ depends on BLUEGENE
+ default n
+ help
+ 'copyin/out' via the BGP DMA is believed functional, but seems not useful since copying via the parallel FP regs
+ seems to run faster, even in cases where that wipes out the L1 cache. Code is left here in case someone wants to
+ try improving it, and to indicate which sections of the BGP DMA unit (injection fifo and reception counters) are needed
+ to make it work.
+
+
+config BLUEGENE_COLLECTIVE_TRACE
+ bool "Activate diagnostic trace in BlueGene/P collective network"
+ default y if BGP
+
+config BLUEGENE_TORUS_TRACE
+ bool "Activate diagnostic trace in BlueGene/P torus network"
+ default y if BGP
+
+
+config BLUEGENE_TCP_WITHOUT_NAPI
+ bool "Blue Gene/P TCP interrupt every packet (no NAPI) for debugging"
+ default n
+
+config BLUEGENE_UNIPROCESSOR
+ bool "Force BlueGene to run uniprocessor (450 debugging, or vrnic)"
+ depends on BLUEGENE
+ default n
+
+config BLUEGENE_STATISTICS
+ bool "Maintain statistics related to BlueGene networking"
+ depends on BLUEGENE
+ default y
+
+config BLUEGENE_SHARE_WITH_VRNIC
+ bool "Allow vRNIC to map all of Linux memory"
+ depends on BLUEGENE
+ default n
+
+config HUGE_KMALLOC
+ bool "Allow for 32MB kmalloc blocks"
+ default y if BGP
+
+config TASK_UNMAPPED_BASE
+ hex "Base virtual address for mmap"
+ depends on BGP
+ default "0x20000000"
+ help
+ processor.h will set this to (TASK_SIZE / 8 * 3) if you do not set it here
+
+config DEBUG_ALIGNMENT_HISTOGRAM
+ bool "copy_tofrom_user alignment histograms"
+ default y
+ help
+ Enables maintenance of alignment histograms for copy_tofrom_user and similar functions,
+ to explore whether alternative implementations might be useful for performance.
+
+config DEBUG_STACK_USAGE
+ bool "Stack utilization instrumentation"
+ default y if BGP
+ help
+ Enables the display of the minimum amount of free stack which each
+ task has ever had available in the sysrq-T and sysrq-P debug output.
+
+ This option will slow down process creation somewhat.
+
+config BOOKE
+ bool
+ depends on 44x
+ default y
+
+config IBM_OCP
+ bool
+ depends on ASH || BAMBOO || BLUEGENE || BUBINGA || CPCI405 || EBONY || EP405 || LUAN || YUCCA || OCOTEA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT
+ default y
+
+config IBM_EMAC4
+ bool
+ depends on 440GX || 440SP || 440SPE || BLUEGENE
+ default y
+
+ config 405EP
+ bool
+ depends on BUBINGA
+ default y
+
+# Some of the items below might not be quite riight; I'm putting part of the 2.6.19 Kconfig in here, enough
+# to get a BGP build working. tjcw.
+config 405GP
+ bool
+ depends on CPCI405 || EP405 || WALNUT
+ default y
+
+config 405GPR
+ bool
+ depends on SYCAMORE
+ default y
+
+config STB03xxx
+ bool
+ depends on REDWOOD_5 || REDWOOD_6
+ default y
+
+config EMBEDDEDBOOT
+ bool
+ depends on EP405 || XILINX_ML300 || XILINX_ML403
+ default y
+
+config IBM_OPENBIOS
+ bool
+ depends on ASH || REDWOOD_5 || REDWOOD_6
+ default y
+
+config PPC4xx_DMA
+ bool "PPC4xx DMA controller support"
+ depends on 4xx
+
+config PPC4xx_EDMA
+ bool
+ depends on !STB03xxx && PPC4xx_DMA
+ default y
+
+config PPC_GEN550
+ bool
+ depends on 4xx
+ default y
+
+choice
+ prompt "TTYS0 device and default console"
+ depends on 40x
+ default UART0_TTYS0
+
+config UART0_TTYS0
+ bool "UART0"
+
+config UART0_TTYS1
+ bool "UART1"
+
+endchoice
+
+config SERIAL_SICC
+ bool "SICC Serial port support"
+ depends on STB03xxx
+
+config UART1_DFLT_CONSOLE
+ bool
+ depends on SERIAL_SICC && UART0_TTYS1
+ default y
+
+config SERIAL_SICC_CONSOLE
+ bool
+ depends on SERIAL_SICC && UART0_TTYS1
+ default y
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index 01f51daace1394..3596f55f80f904 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_EBONY) += ebony.o
obj-$(CONFIG_SAM440EP) += sam440ep.o
obj-$(CONFIG_WARP) += warp.o
obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o
+obj-$(CONFIG_BGP) += bgp_cns.o bgp_bic.o bgp.o bgp_pers.o
diff --git a/arch/powerpc/platforms/44x/bgp.c b/arch/powerpc/platforms/44x/bgp.c
new file mode 100644
index 00000000000000..c180a447d0450d
--- /dev/null
+++ b/arch/powerpc/platforms/44x/bgp.c
@@ -0,0 +1,205 @@
+/*
+ * Blue Gene/P board specific routines
+ *
+ * Todd Inglett <tinglett@us.ibm.com>
+ * Copyright 2003-2009 International Business Machines, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <linux/root_dev.h>
+#include <linux/delay.h>
+
+#include <asm/machdep.h>
+#include <asm/prom.h>
+#include <asm/time.h>
+#include <asm/ppc4xx.h>
+#include <asm/mmu-44x.h>
+#include <asm/smp.h>
+#include <asm/cacheflush.h>
+#include <asm/bluegene.h>
+#include <asm/udbg.h>
+#include <asm/bluegene_ras.h>
+
+
+extern int bgWriteRasStr(unsigned int component,
+ unsigned int subcomponent,
+ unsigned int errCode,
+ char* str,
+ unsigned int strLen);
+extern int bgFlushOutboxMsgs(void);
+
+/*
+ * bgp_probe() is called very early; cpu 0 only
+ * one pinned TLB
+ * device-tree isn't unflattened
+ * Look to see if the boot wrapper says we are a Blue Gene/P.
+ * Setup udbg_ptc, but it will do nothing until the CNS interface is initialized.
+ */
+static int __init bgp_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "ibm,bluegenep"))
+ return 0;
+
+ udbg_putc = bgp_udbg_putc;
+ return 1;
+}
+
+/*
+ * There isn't a concept of a kernel asking to be rebooted on Blue Gene.
+ * The restart, power_off and halt functions should produce RAS to tell the control
+ * system this node is no longer functional.
+ */
+static void bgp_halt(void)
+{
+ bgWriteRasStr(bg_comp_kernel, bg_subcomp_linux, bg_code_halted, "System Halted", 0);
+
+ // Flush halt RAS and any other buffered outbox messages.
+ while (bgFlushOutboxMsgs());
+}
+
+static void bgp_panic(char *str)
+{
+ bgWriteRasStr(bg_comp_kernel, bg_subcomp_linux, bg_code_panic, str, 0);
+
+ // Flush halt RAS and any other buffered outbox messages.
+ while (bgFlushOutboxMsgs());
+}
+
+/* Blue Gene is given the decrementor frequency via the device tree (personality). */
+static void __init bgp_calibrate_decr(void)
+{
+ struct device_node *pernode = of_find_node_by_path("/ibm,bluegene/personality");
+
+ ppc_tb_freq = 0;
+ if (pernode) {
+ int len;
+ const unsigned *reg = of_get_property(pernode, "frequency", &len);
+ if (reg)
+ ppc_tb_freq = *reg;
+ }
+ if (ppc_tb_freq == 0) {
+ udbg_printf("personality/frequency device-tree field not found!\n");
+ ppc_tb_freq = 850000000; /* A very good default */
+ }
+
+ ppc_proc_freq = ppc_tb_freq;
+ mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
+ mtspr(SPRN_TCR, TCR_DIE);
+}
+
+/* Generic 44x init disables icache prefetch which can be enabled. */
+static void __init bgp_enable_icache_prefetch(void)
+{
+ mtspr(SPRN_CCR0, mfspr(SPRN_CCR0)|2);
+ isync();
+ mb();
+}
+
+#ifdef CONFIG_SMP
+/*
+ * The Blue Gene interrupt controller (in bgp_bic.c) can implement
+ * sending IPIs with a cpumask. Consider changing this interface.
+ */
+static void smp_bluegene_message_pass(int target, int msg)
+{
+ unsigned int i;
+
+ if (target < NR_CPUS) {
+ bgp_send_ipi(target, msg);
+ } else {
+ for_each_online_cpu(i) {
+ if (target == MSG_ALL_BUT_SELF
+ && i == smp_processor_id())
+ continue;
+ bgp_send_ipi(i, msg);
+ }
+ }
+}
+
+
+/* Return number of cpus possible in the system.
+ * We wire this to 4 even though it may disagree with NR_CPUS.
+ *
+ * Also a good time to register the IPI interrupt handlers.
+ * The cpu_present_map was already setup via setup_arch, so we use it.
+ */
+static int smp_bluegene_probe(void)
+{
+ return cpus_weight(cpu_possible_map);
+}
+
+/*
+ * Start a cpu by calling firmware.
+ */
+static void smp_bluegene_kick_cpu(int cpu)
+{
+ int ret = bluegene_takeCPU(cpu, 0, (void (*)(unsigned, void *))4);
+ if (ret == 0) {
+ cpu_set(cpu, cpu_present_map);
+ } else {
+ udbg_printf("CPU %d is not available (firmware returns %d)\n", cpu, ret);
+ }
+}
+
+/*
+ * Each secondary cpu needs some initialization.
+ */
+static void __init smp_bluegene_setup_cpu(int nr)
+{
+ int cpu = smp_processor_id();
+
+ flush_instruction_cache();
+ bgp_enable_icache_prefetch();
+ bgp_init_cns(); /* map CNS for this cpu */
+
+ bgp_init_IPI(cpu, PPC_MSG_CALL_FUNCTION);
+ bgp_init_IPI(cpu, PPC_MSG_RESCHEDULE);
+ bgp_init_IPI(cpu, PPC_MSG_CALL_FUNC_SINGLE);
+ bgp_init_IPI(cpu, PPC_MSG_DEBUGGER_BREAK);
+}
+
+static struct smp_ops_t bluegene_smp_ops = {
+ .message_pass = smp_bluegene_message_pass,
+ .probe = smp_bluegene_probe,
+ .kick_cpu = smp_bluegene_kick_cpu,
+ .setup_cpu = smp_bluegene_setup_cpu,
+};
+#endif
+
+/*
+ * Initialize CNS (Common Node Services) in bgp_cns.c.
+ * Once we have initialized CNS, we can crudely print messages with
+ * udbg_printf().
+ */
+static void __init bgp_setup_arch(void)
+{
+ ROOT_DEV = Root_RAM0;
+
+ bgp_enable_icache_prefetch();
+ bgp_init_cns();
+#ifdef CONFIG_SMP
+ smp_ops = &bluegene_smp_ops;
+#endif
+}
+
+define_machine(bgp) {
+ .name = "bgp",
+ .probe = bgp_probe,
+ .setup_arch = bgp_setup_arch,
+ .init_IRQ = bgp_init_IRQ,
+ .get_irq = bgp_get_irq,
+ .restart = (void (*)(char *))bgp_halt,
+ .power_off = bgp_halt,
+ .halt = bgp_halt,
+ .panic = bgp_panic,
+ .calibrate_decr = bgp_calibrate_decr,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/44x/bgp_bic.c b/arch/powerpc/platforms/44x/bgp_bic.c
new file mode 100644
index 00000000000000..1d5e029b297259
--- /dev/null
+++ b/arch/powerpc/platforms/44x/bgp_bic.c
@@ -0,0 +1,675 @@
+/*
+ * Blue Gene/P interrupt controller
+ *
+ * Linux wants IRQs mapped to a small integer space.
+ *
+ * The bic defines 15 groups and 32 group interrupts in each group.
+ * We encode an IRQ number like this (which requires NR_IRQS=512):
+ * GGGGIIIII
+ * where GGGG is the 4-bit group number+1 (i.e. GGGG=0000 is not used),
+ * and IIIII is the 5-bit interrupt index within the 32-bit word.
+ * The interrupt indexes are numbered from the left bit (powerpc-style).
+ * We avoid encoding GGGG=0000 so we never end up with an IRO=0 which is a
+ * flag for "no interrupt" in arch/powerpc.
+ *
+ * The IPIs subdivide the group 0 interrupt word as follows:
+ *
+ * CRSD CRSD CRSD CRSD .... .... .... ....
+ * 0 4 8 12 16 20 24 28
+ * cpu0 cpu1 cpu2 cpu3
+
+ * where C=call, R=resched, S=call-single, D=debug, and .=unused
+ *
+ * We encode IPI IRQ numbers specially. By the above encoding they would be
+ * 32..47 for these 16 bits.
+ *
+ * The other 16 bits in group 0 are treated normally. These will translate to
+ * IRQ = 48..63 and can be used by software to simulate hardware interrupts for
+ * other purposes.
+ *
+ *
+ * Todd Inglett <tinglett@us.ibm.com>
+ * Copyright 2003-2009 International Business Machines, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <asm/bluegene.h>
+
+/* #define TJCW_USE_BYTEWISE */
+/* #define BIC_DIAGNOSE 1 */
+
+#if defined(BIC_DIAGNOSE)
+extern int bgp_dma_tcp_tracemask ;
+static int bic_diagnose_count ;
+enum {
+ k_bic_diagnose_limit = 100
+};
+static unsigned int bic_diagnosing(void)
+{
+ if( 0 == (bgp_dma_tcp_tracemask & 0x80000000) )
+ {
+ if( bic_diagnose_count < k_bic_diagnose_limit)
+ {
+ bic_diagnose_count += 1 ;
+ return 1 ;
+ }
+ }
+ else
+ {
+ bic_diagnose_count = 0 ;
+ }
+ return 0 ;
+}
+#define BIC_DIAG(X) if(bic_diagnosing()) { X ; }
+#else
+#define BIC_DIAG(X)
+#endif
+
+void bic_unmask_irq(unsigned int irq);
+EXPORT_SYMBOL(bic_unmask_irq) ;
+static void bic_mask_irq(unsigned int irq);
+static void bic_eoi_irq(unsigned int irq);
+
+static void bic_unmask_irq_bytewise(unsigned int irq) __attribute__((unused)) ;
+
+static void bic_mask_irq_bytewise(unsigned int irq) __attribute__((unused)) ;
+#if defined(TJCW_USE_BYTEWISE)
+static struct irq_chip bgp_irq_chip = {
+ .name = "BIC",
+ .unmask = bic_unmask_irq_bytewise,
+ .mask = bic_mask_irq_bytewise,
+ .eoi = bic_eoi_irq,
+};
+#else
+static struct irq_chip bgp_irq_chip = {
+ .name = "BIC",
+ .unmask = bic_unmask_irq,
+ .mask = bic_mask_irq,
+ .eoi = bic_eoi_irq,
+};
+#endif
+
+
+/* Note that the BIC (and other devices) are at phys addresses > 4GB */
+#define BIC_PHYS 0x730000000LL
+
+/* These are defined by the hardware. */
+#define NR_BIC_GROUPS 15
+#define NR_BIC_GINTS 32
+#define NR_BIC_CPUS 4
+
+/* 4-bit target value for target register */
+#define BIC_TARGET_MASK (0xf)
+#define BIC_TARGET_TYPE_NORMAL (1<<2)
+#define BIC_TARGET_NORMAL(cpu) (BIC_TARGET_TYPE_NORMAL|(cpu))
+#define BIC_DEFAULT_CPU 0
+#define BIC_IPI_GROUP 0
+
+/* Define the layout of each group's registers.
+ * This layout should be 0x80 bytes long (including pad).
+ */
+struct bic_group_regs {
+ uint32_t status; /* 0x00 RW */
+ uint32_t rd_clr_status; /* 0x04 RO */
+ uint32_t status_clr; /* 0x08 WO */
+ uint32_t status_set; /* 0x0c WO */
+ uint32_t target[4]; /* 0x10 RW */
+ uint32_t normal[NR_BIC_CPUS]; /* 0x20 RW */
+ uint32_t critical[NR_BIC_CPUS]; /* 0x30 RW */
+ uint32_t mcheck[NR_BIC_CPUS]; /* 0x40 RW */
+ uint32_t _pad[12]; /* 0x50 */
+};
+
+/* Define the layout of the interrupt controller mem mapped regs. */
+struct bic_regs {
+ struct bic_group_regs group[NR_BIC_GROUPS]; /* 0x000 */
+ uint32_t hier_normal[NR_BIC_CPUS]; /* 0x780 */
+ uint32_t hier_critical[NR_BIC_CPUS]; /* 0x790 */
+ uint32_t hier_mcheck[NR_BIC_CPUS]; /* 0x7a0 */
+};
+
+/* This table is indexed by 'real' IRQ, i.e. BIC values. Linux 'virtual' IRQs are +32 */
+static volatile unsigned char intended_cpu_for_irq[NR_BIC_GROUPS*NR_BIC_GINTS] =
+ {
+/* 0 */
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(1),BIC_TARGET_NORMAL(1),BIC_TARGET_NORMAL(1),BIC_TARGET_NORMAL(1),
+ BIC_TARGET_NORMAL(2),BIC_TARGET_NORMAL(2),BIC_TARGET_NORMAL(2),BIC_TARGET_NORMAL(2),
+ BIC_TARGET_NORMAL(3),BIC_TARGET_NORMAL(3),BIC_TARGET_NORMAL(3),BIC_TARGET_NORMAL(3),
+
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+/* 32 */
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+/* 64 */
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+/* 128 */
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+/* 256 */
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),
+ BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0),BIC_TARGET_NORMAL(0)
+/* 480 */
+
+ };
+
+static inline void out_be8(unsigned char * target, unsigned int val)
+{
+ *target = val ;
+}
+
+static inline unsigned int in_be8(unsigned char * target)
+{
+ return *target ;
+}
+
+/* Group is encoded in the upper 4 bits. We account for group+1. */
+static inline unsigned bic_irq_to_hwgroup(unsigned irq)
+{
+ return ((irq >> 5) & 0xf) - 1;
+}
+/* Gint is encoded in the bottom 5 bits. */
+static inline unsigned bic_irq_to_hwgint(unsigned irq)
+{
+ return irq & 0x1f;
+}
+
+static inline unsigned bic_irq_to_hwirq(unsigned irq)
+{
+ return irq - (1 << 5);
+}
+
+/* bic_hw_to_irq(unsigned group, unsigned gint) is in bluegene.h */
+/* Need to keep a track in memory of where each interrupt is pointed at
+ * so we can reassemble the right hardware register contents even with SMP behaviour
+ */
+static volatile unsigned char cpu_for_irq[NR_BIC_GROUPS*NR_BIC_GINTS] ;
+static void set_cpu_for_hwirq(unsigned int hwirq, unsigned int tcpu)
+ {
+ cpu_for_irq[hwirq] = tcpu ;
+ }
+
+void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu)
+ {
+ unsigned int hwirq=bic_irq_to_hwirq(irq) ;
+ if( irq < NR_BIC_GROUPS*NR_BIC_GINTS )
+ {
+ intended_cpu_for_irq[hwirq] = BIC_TARGET_NORMAL(cpu) ;
+ }
+ BIC_DIAG(printk(KERN_INFO "bic_set_cpu_for_irq irq=0x%02x cpu=%d hwirq=0x%02x\n",
+ irq,cpu,hwirq)) ;
+ }
+
+/* Stop the BIC from passing an interrupt to the CPU. The idea is to */
+/* call this in a FLIH if you don't want a 'reinterrupt', and call */
+/* 'bic_set_cpu_for_irq' later on (e.g. from a NAPI 'poll') */
+void bic_disable_irq(unsigned int irq)
+ {
+ if( irq < NR_BIC_GROUPS*NR_BIC_GINTS )
+ {
+ intended_cpu_for_irq[bic_irq_to_hwirq(irq)] = 0 ;
+ }
+ }
+
+EXPORT_SYMBOL(bic_disable_irq) ;
+
+int bic_get_cpu_for_irq(unsigned int irq)
+ {
+ return intended_cpu_for_irq[bic_irq_to_hwirq(irq)] ;
+ }
+
+
+struct bic {
+ spinlock_t mask_lock; /* could be finer grained if necessary */
+ struct bic_regs *regs;
+ uint32_t enabled_mask[NR_BIC_GROUPS] ; /* Hardware can report status even if a bit doesn't cause interrupt. This to mask off ... */
+} bic;
+
+
+/* ipi_to_irq(cpu, msg)
+ * Produce a Linux IRQ number given a cpu+func.
+ * The caller ensures cpu in 0..3 and func in 0..3.
+ */
+static inline unsigned ipi_to_irq(unsigned cpu, unsigned func)
+{
+ return bic_hw_to_irq(BIC_IPI_GROUP, (cpu<<2)+func);
+}
+/* Generate a 4-bit IPI range mask for this cpu retaining the unused bits. */
+static inline unsigned ipi_mask(unsigned cpu)
+{
+ return 0xf0000000U >> (cpu << 2) | 0x0000ffffU;
+}
+/* Given an gint we know is an IPI (0..15), return the cpu that
+ * should be targeted. Remember these bits are numbered from the left.
+ */
+static inline unsigned ipi_gint_cpu(unsigned gint)
+{
+ return (gint >> 2) & 0x3;
+}
+static inline int is_ipi(unsigned group, unsigned gint)
+{
+ return (group == 0) && (gint < 16);
+}
+
+#define GINT_TO_IRQ(group, gint) (((group) << 5) | (gint))
+static unsigned int get_tcpu_for_tnum(unsigned int group, unsigned int tnum)
+ {
+ unsigned int rbase = GINT_TO_IRQ(group,(tnum<<3)) ;
+ unsigned int t0 = cpu_for_irq[rbase+0] ;
+ unsigned int t1 = cpu_for_irq[rbase+1] ;
+ unsigned int t2 = cpu_for_irq[rbase+2] ;
+ unsigned int t3 = cpu_for_irq[rbase+3] ;
+ unsigned int t4 = cpu_for_irq[rbase+4] ;
+ unsigned int t5 = cpu_for_irq[rbase+5] ;
+ unsigned int t6 = cpu_for_irq[rbase+6] ;
+ unsigned int t7 = cpu_for_irq[rbase+7] ;
+ return ((t0 & 0x0f) << 28) |
+ ((t1 & 0x0f) << 24) |
+ ((t2 & 0x0f) << 20) |
+ ((t3 & 0x0f) << 16) |
+ ((t4 & 0x0f) << 12) |
+ ((t5 & 0x0f) << 8) |
+ ((t6 & 0x0f) << 4) |
+ ((t7 & 0x0f)) ;
+
+ }
+static unsigned int get_tcpu_for_tnum_byte(unsigned int group, unsigned int tnum)
+ {
+ unsigned int rbase = GINT_TO_IRQ(group,(tnum<<1)) ;
+ unsigned int t0 = cpu_for_irq[rbase+0] ;
+ unsigned int t1 = cpu_for_irq[rbase+1] ;
+ return ((t0 & 0x0f) << 4) |
+ ((t1 & 0x0f)) ;
+
+ }
+/*
+ * Unmasking an IRQ will enable it.
+ * We reach into the bic to set the target core of the interrupt appropriately.
+ * For now, interrupts are wired to a default core, although IPIs (of course)
+ * must be directed appropriately.
+ */
+void bic_unmask_irq(unsigned int irq)
+{
+ unsigned group = bic_irq_to_hwgroup(irq);
+ unsigned gint = bic_irq_to_hwgint(irq);
+ unsigned tnum = gint >> 3;
+ unsigned tidx = gint & 7;
+/* unsigned orig, tmask, tcpu; */
+ unsigned tmask, tcpu;
+ uint32_t *targetp = &bic.regs->group[group].target[tnum];
+ unsigned cpu;
+ unsigned int request_tcpu ;
+ unsigned int verify_tcpu ;
+
+ spin_lock(&bic.mask_lock);
+ bic.enabled_mask[group] |= 0x80000000 >> gint ; /* Note that this interrupt is enabled */
+ spin_unlock(&bic.mask_lock);
+
+ tmask= ~(0xf << (7-tidx)*4);
+
+ if (group == 0 /*is_ipi(group, gint)*/) {
+ /* These bits are magic. We know they are for IPIs
+ * and must direct them to the correct core.
+ */
+ cpu = ipi_gint_cpu(gint);
+ tcpu = BIC_TARGET_NORMAL(cpu) << (7-tidx)*4;
+ } else {
+ cpu = BIC_DEFAULT_CPU;
+ tcpu = BIC_TARGET_NORMAL(cpu) << (7-tidx)*4;
+ }
+
+
+ {
+ unsigned int hwirq = bic_irq_to_hwirq(irq) ;
+ unsigned int tgtcpu=intended_cpu_for_irq[hwirq] ; /* Note .. 'cpu' has the b'0100' bit set already if appropriate */
+ set_cpu_for_hwirq(hwirq,tgtcpu) ;
+ request_tcpu=get_tcpu_for_tnum(group,tnum) ;
+/* BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d targtval=0x%08x request_tcpy=0x%08x\n", */
+/* irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu,(orig & tmask)|tcpu, request_tcpu)) ; */
+ BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d request_tcpy=0x%08x\n",
+ irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu, request_tcpu)) ;
+
+ out_be32(targetp, request_tcpu) ;
+ verify_tcpu=get_tcpu_for_tnum(group,tnum) ;
+ while(request_tcpu != verify_tcpu)
+ {
+ /* If another CPU changed the target for an interrupt while we were writing, pick up the change */
+ /* and set the hw register appropriately. Eventually the last writer should reflect what */
+ /* everyone wants. */
+ request_tcpu = verify_tcpu ;
+ printk(KERN_NOTICE "irq=0x%02x set=%x redo request_tcpu=%08x\n", irq,BIC_TARGET_NORMAL(cpu),request_tcpu) ;
+ out_be32(targetp, request_tcpu) ;
+ verify_tcpu=get_tcpu_for_tnum(group,tnum) ;
+ }
+
+ }
+
+}
+static void bic_unmask_irq_bytewise(unsigned int irq)
+{
+ unsigned group = bic_irq_to_hwgroup(irq);
+ unsigned gint = bic_irq_to_hwgint(irq);
+ unsigned tnum = gint >> 1;
+ unsigned tidx = gint & 1;
+/* unsigned orig, tmask, tcpu; */
+ unsigned tmask;
+ unsigned char *basep = (unsigned char *)(bic.regs->group[group].target) ;
+ unsigned char *targetp = basep+tnum ;
+ unsigned cpu;
+ unsigned int request_tcpu ;
+ unsigned int verify_tcpu ;
+
+ spin_lock(&bic.mask_lock);
+ bic.enabled_mask[group] |= 0x80000000 >> gint ; /* Note that this interrupt is enabled */
+ spin_unlock(&bic.mask_lock);
+
+ tmask= ~(0xf << (1-tidx)*4);
+
+ if (group == 0 /*is_ipi(group, gint)*/) {
+ /* These bits are magic. We know they are for IPIs
+ * and must direct them to the correct core.
+ */
+ cpu = ipi_gint_cpu(gint);
+ } else {
+ cpu = BIC_DEFAULT_CPU;
+ }
+
+
+ {
+ unsigned int hwirq = bic_irq_to_hwirq(irq) ;
+ unsigned int tgtcpu=intended_cpu_for_irq[hwirq] ; /* Note .. 'cpu' has the b'0100' bit set already if appropriate */
+ set_cpu_for_hwirq(hwirq,tgtcpu) ;
+ request_tcpu=get_tcpu_for_tnum_byte(group,tnum) ;
+/* BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d targtval=0x%08x request_tcpy=0x%08x\n", */
+/* irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu,(orig & tmask)|tcpu, request_tcpu)) ; */
+ BIC_DIAG(printk(KERN_INFO "bic_unmask_irq irq=0x%02x hwirq=0x%02x group=0x%02x tnum=0x%02x gint=0x%02x tmask=0x%08x targetp=%p cpu=%d tgtcpu=%d request_tcpy=0x%08x\n",
+ irq,hwirq,group,tnum,gint,tmask,targetp,cpu,tgtcpu, request_tcpu)) ;
+
+ out_be8(targetp, request_tcpu) ;
+ verify_tcpu=get_tcpu_for_tnum_byte(group,tnum) ;
+ while(request_tcpu != verify_tcpu)
+ {
+ /* If another CPU changed the target for an interrupt while we were writing, pick up the change */
+ /* and set the hw register appropriately. Eventually the last writer should reflect what */
+ /* everyone wants. */
+ request_tcpu = verify_tcpu ;
+ printk(KERN_NOTICE "irq=0x%02x set=%x redo request_tcpu=%08x\n", irq,BIC_TARGET_NORMAL(cpu),request_tcpu) ;
+ out_be8(targetp, request_tcpu) ;
+ verify_tcpu=get_tcpu_for_tnum_byte(group,tnum) ;
+ }
+
+ }
+
+}
+
+/*
+ * Masking an IRQ will disable it.
+ * We do this by changing the target to disable. This works for IPI bits,
+ */
+static void bic_mask_irq(unsigned int irq)
+{
+ unsigned group = bic_irq_to_hwgroup(irq);
+ unsigned gint = bic_irq_to_hwgint(irq);
+ unsigned tnum = gint >> 3;
+ unsigned tidx = gint & 7;
+ unsigned orig, tmask;
+ uint32_t *targetp = &bic.regs->group[group].target[tnum];
+
+ tmask = BIC_TARGET_MASK << (7-tidx)*4;
+ BIC_DIAG(printk(KERN_INFO "bic_mask_irq irq=0x%02x group=0x%02x gint=0x%02x tmask=0x%02x\n",
+ irq,group,gint,tmask)) ;
+ spin_lock(&bic.mask_lock);
+ bic.enabled_mask[group] &= 0xffffffff ^ (0x80000000 >> gint) ; /* Note that this interrupt is disabled */
+ orig = in_be32(targetp);
+ out_be32(targetp, orig & ~tmask);
+ spin_unlock(&bic.mask_lock);
+}
+
+static void bic_mask_irq_bytewise(unsigned int irq)
+{
+ unsigned int hwirq = bic_irq_to_hwirq(irq) ;
+ unsigned group = bic_irq_to_hwgroup(irq);
+ unsigned gint = bic_irq_to_hwgint(irq);
+ unsigned tnum = gint >> 1;
+ unsigned tidx = gint & 1;
+ unsigned orig, tmask;
+ unsigned char *basep = (unsigned char *)(bic.regs->group[group].target) ;
+ unsigned char *targetp = basep+tnum ;
+
+ set_cpu_for_hwirq(hwirq,0) ;
+ tmask = BIC_TARGET_MASK << ((1-tidx)*4);
+ BIC_DIAG(printk(KERN_INFO "bic_mask_irq irq=0x%02x group=0x%02x gint=0x%02x tmask=0x%02x\n",
+ irq,group,gint,tmask)) ;
+ spin_lock(&bic.mask_lock);
+ bic.enabled_mask[group] &= 0xffffffff ^ (0x80000000 >> gint) ; /* Note that this interrupt is disabled */
+ orig = in_be8(targetp);
+ out_be8(targetp, orig & ~tmask);
+ spin_unlock(&bic.mask_lock);
+}
+
+/*
+ * End an interrupt. We just need to write the bit to be cleared
+ * and the hardware handles it. No locking needed.
+ */
+static void bic_eoi_irq(unsigned int irq)
+{
+ unsigned group = bic_irq_to_hwgroup(irq);
+ unsigned gint = bic_irq_to_hwgint(irq);
+ uint32_t gintbits = 1 << (31 - gint);
+/* BIC_DIAG(printk(KERN_INFO "bic_eoi_irq irq=0x%02x group=0x%02x gint=0x%02x \n",irq,group,gint)) ; */
+
+ out_be32(&bic.regs->group[group].status_clr, gintbits);
+ mb();
+}
+
+/* Return the hardware cpu index as needed by the bic.
+ * Currently this matches smp_processor_id(), but we do this explicitly
+ * in case we ever want to virtualize the processor id.
+ */
+static inline unsigned this_cpu(void)
+{
+ unsigned cpu;
+ asm volatile("mfspr %0, 0x11e" : "=r" (cpu));
+ return cpu;
+}
+
+/* Return 0..32 counting from the left (same as bic). 32=> no bit set.
+ * Could use bitops.h as long as it always matches the bic.
+ */
+static inline unsigned bic_find_first_bit(unsigned x)
+{
+ unsigned lz;
+ asm("cntlzw %0,%1" : "=r" (lz) : "r" (x));
+ return lz;
+}
+
+/*
+ * Get an IRQ from the BIC.
+ * We analyze the normal hierarchy register to find which group has caused an
+ * interrupt. Similarily, we find the first bit within a group to find the first
+ * source of interrupt. This artificially prioritizes interrupts.
+ *
+ * We handle IPIs specially. This core can see IPI bits which did not actually
+ * interrupt this core. We mask off those bits and otherwise process normally.
+ */
+unsigned int bgp_get_irq(void)
+{
+ unsigned thiscpu = this_cpu();
+ unsigned nhier, group, gint;
+ uint32_t gintbits;
+ int irq = NO_IRQ;
+
+ nhier = in_be32(&(bic.regs->hier_normal[thiscpu]));
+ group = bic_find_first_bit(nhier);
+ if (group >= NR_BIC_GROUPS)
+ goto out;
+ {
+ gintbits = in_be32(&bic.regs->group[group].status) & bic.enabled_mask[group] ;
+ if (group == BIC_IPI_GROUP) {
+ /* This may be an IPI. Mask out other cpu IPI bits so we don't try
+ * to handle it on this core! We don't mask the other 16 bits.
+ */
+ unsigned mask = ipi_mask(thiscpu);
+ gintbits &= mask;
+ }
+ gint = bic_find_first_bit(gintbits);
+ }
+ if (gint >= NR_BIC_GINTS)
+ goto out;
+ irq = bic_hw_to_irq(group, gint);
+out:
+/* BIC_DIAG(printk(KERN_INFO "bgp_get_irq nhier=0x%02x group=0x%02x gintbits=0x%08x gint=0x%02x irq=0x%02x\n", */
+/* nhier,group,gintbits,gint,irq)) ; */
+ return irq;
+}
+
+#ifdef CONFIG_SMP
+/*
+ * Send an IPI to another cpu.
+ * This could be coded to send to a cpu mask.
+ */
+enum {
+ k_spinlimit = 1000000 ,
+ k_reportlimit = 100
+};
+static unsigned int reportcount ;
+void bgp_send_ipi(int cpu, int msg)
+{
+ unsigned group = BIC_IPI_GROUP;
+ unsigned gint = ipi_to_irq(cpu, msg) & 0x1f;
+ uint32_t gintbits = 1 << (31 - gint);
+ uint32_t ngintbits;
+ unsigned int spincount = 0 ;
+
+ /* If this interrupt is already raised we must wait for it to complete else
+ * we might race with the ack by the other waiting cpu.
+ * Once it is clear there is no guarantee another cpu won't take it in tandem
+ * with this cpu. Currently that is ok, because a reschedule race is harmless
+ * as the goal of rescheduling is met, and the others hold a lock while the
+ * operation is in progress. Why doesn't the lock protect us? There is a window
+ * between the lock release and the IPI interrupt ack where we will race.
+ * This plugs the race. It may be better to reallocate the IPI bits for unique
+ * core-to-core combinations.
+ */
+ do {
+ spincount += 1 ;
+ ngintbits = in_be32(&bic.regs->group[group].status);
+ } while ( (ngintbits & gintbits) && (spincount < k_spinlimit) ) ;
+
+ /* Pull the interrupt. */
+ if( spincount < k_spinlimit)
+ {
+ out_be32(&bic.regs->group[group].status_set, gintbits);
+ }
+ else
+ {
+ if(reportcount < k_reportlimit)
+ {
+ printk(KERN_WARNING "bgp_send_ipi cpu=%d msg=%d stuck\n", cpu, msg) ;
+ reportcount += 1;
+ }
+ }
+}
+
+/* Initialize an IPI handler. This is only here to use ipi_to_irq(), which
+ * could be exposed in bluegene.h.
+ */
+void bgp_init_IPI(int cpu, int msg)
+{
+ smp_request_message_ipi(ipi_to_irq(cpu, msg), msg);
+}
+#endif
+
+/* Initialize the bic.
+ * We set the handlers as percpu because bic interrupts are wired
+ * to specific cores (we never broadcast to all cores).
+ */
+static void __init disable_all_bic_interrupts(void)
+{
+ int group ;
+ struct bic_regs * regs = bic.regs ;
+ for(group=0; group<NR_BIC_GROUPS; group += 1)
+ {
+ struct bic_group_regs *group_regs = regs->group+group ;
+ group_regs->target[0] = 0 ;
+ group_regs->target[1] = 0 ;
+ group_regs->target[2] = 0 ;
+ group_regs->target[3] = 0 ;
+ bic.enabled_mask[group] = 0 ;
+ }
+}
+void __init bgp_init_IRQ(void)
+{
+ int irq;
+
+ bic.regs = ioremap(BIC_PHYS, sizeof(*bic.regs));
+ disable_all_bic_interrupts() ;
+ bic.mask_lock = SPIN_LOCK_UNLOCKED;
+ for_each_irq(irq) {
+ /* Interrupts from the BIC are percpu (we don't use broadcast)
+ * so we may as well take the cycle advantage and declare it.
+ */
+ set_irq_chip_and_handler(irq, &bgp_irq_chip, handle_percpu_irq);
+ }
+}
+
+EXPORT_SYMBOL(bic) ;
+EXPORT_SYMBOL(bic_set_cpu_for_irq) ;
diff --git a/arch/powerpc/platforms/44x/bgp_cns.c b/arch/powerpc/platforms/44x/bgp_cns.c
new file mode 100644
index 00000000000000..1cccd1962589c9
--- /dev/null
+++ b/arch/powerpc/platforms/44x/bgp_cns.c
@@ -0,0 +1,244 @@
+/*
+ * Blue Gene/P Common Node Services (CNS) wrappers
+ *
+ * These are declared in asm/bluegene.h but implemented here.
+ *
+ * Copyright 2003-2009 International Business Machines, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * Author: Todd Inglett <tinglett@us.ibm.com>
+ */
+
+#include <linux/init.h>
+#include <linux/of_platform.h>
+#include <asm/pgtable.h>
+#include <asm/bluegene.h>
+#include <asm/bgcns.h>
+
+/* The descriptor for CNS identifies location and entry point of firmware.
+ * We re-build it from data passed through the ibm,bluegene-cns device tree entry.
+ */
+BGCNS_Descriptor bgcnsd;
+
+/* These functions spin on specific errors when we can't print messages.
+ * They make it easy to find the cause of the error by finding the iar in the
+ * kernel System.map.
+ */
+static void noinline __init bgp_fatal_no_ibm_bluegene_cns(void) { for (;;); }
+static void noinline __init bgp_fatal_no_base_va(void) { for (;;); }
+static void noinline __init bgp_fatal_no_base_pa(void) { for (;;); }
+static void noinline __init bgp_fatal_no_services(void) { for (;;); }
+static void noinline __init bgp_fatal_no_size(void) { for (;;); }
+static void noinline __init bgp_fatal_no_version(void) { for (;;); }
+
+/* Get the descriptor for CNS from the device tree.
+ * Don't inline so we can make out the stack trace easier when it isn't working.
+ */
+static void noinline __init get_cns_descriptor(BGCNS_Descriptor *bgcnsd)
+{
+ int len;
+ const unsigned *reg;
+ struct device_node *devcns = of_find_node_by_path("/ibm,bluegene/cns");
+
+ if (!devcns) bgp_fatal_no_ibm_bluegene_cns();
+
+ reg = of_get_property(devcns, "base-va", &len);
+ if (!reg) bgp_fatal_no_base_va();
+ bgcnsd->baseVirtualAddress = *reg;
+ reg = of_get_property(devcns, "base-pa", &len);
+ if (!reg) bgp_fatal_no_base_pa();
+ bgcnsd->basePhysicalAddress = *reg;
+ bgcnsd->basePhysicalAddressERPN = 0; /* assumes DDR <= 4G */
+ reg = of_get_property(devcns, "services", &len);
+ if (!reg) bgp_fatal_no_services();
+ bgcnsd->services = (void *)(*reg);
+ reg = of_get_property(devcns, "size", &len);
+ if (!reg) bgp_fatal_no_size();
+ bgcnsd->size = *reg;
+ reg = of_get_property(devcns, "version", &len);
+ if (!reg) bgp_fatal_no_version();
+ bgcnsd->version = *reg;
+}
+
+void __init ppc44x_update_tlb_hwater(void); /* from mm/44x_mmu.c */
+
+static void noinline __init map_cns(BGCNS_Descriptor *bgcnsd)
+{
+ unsigned word0, word1, word2;
+ int entry = 62; /* We reserve one of the PPC44x_EARLY_TLBS in asm/mmu-44x.h */
+
+ word0 = (bgcnsd->baseVirtualAddress & 0xfffff000) | PPC44x_TLB_VALID | PPC44x_TLB_256K;
+ word1 = (bgcnsd->basePhysicalAddress & 0xfffff000) | (bgcnsd->basePhysicalAddressERPN & 0xf);
+ word2 = PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_M | PPC44x_TLB_WL1 | PPC44x_TLB_U2;
+ __asm__ __volatile__(
+ "tlbwe %1,%0,0\n"
+ "tlbwe %2,%0,1\n"
+ "tlbwe %3,%0,2\n"
+ "isync\n" : : "r" (entry), "r" (word0), "r" (word1), "r" (word2));
+}
+
+extern int map_page(unsigned long va, phys_addr_t pa, int flags);
+
+void __init bgp_init_cns(void)
+{
+ unsigned long v_start, v_end, v, p;
+
+ if (bgcnsd.size == 0) {
+ /* Get the descriptor, map CNS, and tell Linux about the mapping. */
+ get_cns_descriptor(&bgcnsd);
+ v_start = bgcnsd.baseVirtualAddress;
+ v_end = v_start + bgcnsd.size;
+ v_start -= PAGE_SIZE; /* hack: reserve 1 extra page */
+ v = v_start;
+ p = bgcnsd.basePhysicalAddress; /* always < 4G */
+ /* We must be careful because we could hit 4G and wrap to v == 0.
+ * Hence the v > v_start check.
+ */
+ for (; v < v_end && v > v_start; v += PAGE_SIZE, p += PAGE_SIZE)
+ map_page(v, p, _PAGE_RAM_TEXT);
+ }
+ map_cns(&bgcnsd);
+}
+
+/* Simple udbg_putc. We perform rudimentary buffering so it is readable. */
+static int bgp_udbg_cur = 0;
+static char bgp_udbg_buf[256];
+void bgp_udbg_putc(char c)
+{
+ bgp_udbg_buf[bgp_udbg_cur++] = c;
+ if (c == '\n' || bgp_udbg_cur >= sizeof(bgp_udbg_buf)) {
+ if (bgcnsd.size)
+ bluegene_writeToMailboxConsole(bgp_udbg_buf, bgp_udbg_cur);
+ bgp_udbg_cur = 0;
+ }
+}
+
+
+#define CALLCNS(service) \
+ ({ unsigned flags; \
+ typeof(bgcnsd.services->service) ret; \
+ local_save_flags(flags); \
+ local_irq_disable(); \
+ ret = bgcnsd.services->service; \
+ local_irq_restore(flags); \
+ ret; \
+ })
+
+
+/* This returns non-zero if there is something in an input mailbox. */
+int bluegene_testInboxAttention(void)
+{
+ /* ToDo: this should be fast. Read the DCR directly. */
+ return CALLCNS(testInboxAttention());
+}
+
+int bluegene_testForOutboxCompletion(void)
+{
+ return CALLCNS(testForOutboxCompletion());
+}
+
+int bluegene_writeRASEvent_nonBlocking(unsigned facility,
+ unsigned unit,
+ unsigned short err_code,
+ unsigned numDetails,
+ unsigned details[])
+{
+ return CALLCNS(writeRASEvent_nonBlocking(facility, unit, err_code, numDetails, details));
+}
+
+int bluegene_writeRASString(unsigned facility,
+ unsigned unit,
+ unsigned short err_code,
+ char* str)
+{
+ return CALLCNS(writeRASString(facility, unit, err_code, str));
+}
+
+int bluegene_writeRASString_nonBlocking(unsigned facility,
+ unsigned unit,
+ unsigned short err_code,
+ char* str)
+{
+ return CALLCNS(writeRASString_nonBlocking(facility, unit, err_code, str));
+}
+
+int bluegene_writeToMailboxConsole(char *msg, unsigned msglen)
+{
+ return CALLCNS(writeToMailboxConsole(msg, msglen));
+}
+
+int bluegene_writeToMailboxConsole_nonBlocking(char *msg, unsigned msglen)
+{
+ return CALLCNS(writeToMailboxConsole_nonBlocking(msg, msglen));
+}
+
+unsigned bluegene_readFromMailboxConsole(char *buf, unsigned bufsize)
+{
+ return CALLCNS(readFromMailboxConsole(buf, bufsize));
+}
+
+int bluegene_macResetPHY(void)
+{
+ return CALLCNS(macResetPHY());
+}
+ /* ! @brief Tests the MAC unit's link but does not block. */
+ /* ! @param[in] link_type specifies the type of link to be tested. */
+ /* ! @param[out] result points to the link status, which is valid only when the return code is */
+ /* ! BGCNS_RC_COMPLETE. A value of one (1) indicates that the link is active; zero (0) */
+ /* ! indicates that it is inactive. */
+ /* ! @param[in] reset indicates whether this is the beginning (1) or a continuation (0) of a */
+ /* ! test link sequence. That is, callers should initiate a sequence with reset=1 and then */
+ /* ! if receiving a return code of BGCNS_RC_CONTINUE, should invoke this service again with */
+ /* ! reset=0. */
+ /* ! @param[in] timeoutInMillis the (approximate) number of milliseconds that this service can have */
+ /* ! before returning. If the allotted time is not sufficient, the service will return BGCNS_RC_CONTINUE */
+ /* ! to indicate that it needs additional time. */
+ /* ! @return BGCNS_RC_COMPLETE if the test is complete (result is valid only in this case). BGCNS_RC_CONTINUE */
+ /* ! if the reset operation is not yet complete. BGCNS_RC_ERROR if the reset operation failed. */
+ int (*macTestLink_nonBlocking)(BGCNS_LinkType link_type, unsigned* result, int reset, unsigned timeoutInMillis);
+
+
+int bluegene_macTestRxLink(void)
+{
+ return CALLCNS(macTestLink(BGCNS_Receiver));
+}
+
+
+int bluegene_macTestTxLink(void)
+{
+ return CALLCNS(macTestLink(BGCNS_Transmitter));
+}
+
+int bluegene_takeCPU(unsigned cpu, void *arg, void (*entry)(unsigned cpu, void *arg))
+{
+ return CALLCNS(takeCPU(cpu, arg, entry));
+}
+
+
+int bluegene_getPersonality(void *buff, unsigned buffSize)
+{
+ int sz;
+ unsigned flags;
+
+ local_save_flags(flags);
+ local_irq_disable();
+ sz = bgcnsd.services->getPersonalitySize();
+ if (sz > buffSize)
+ sz = buffSize;
+ memcpy(buff, bgcnsd.services->getPersonalityData(), sz);
+ local_irq_restore(flags);
+
+ return sz;
+}
+
+int bluegene_mapXEMAC(void* baseAddr)
+{
+ return CALLCNS(mapDevice(BGCNS_XEMAC, baseAddr));
+}
+
+EXPORT_SYMBOL(bluegene_getPersonality) ;
+EXPORT_SYMBOL(bgcnsd) ;
diff --git a/arch/powerpc/platforms/44x/bgp_pers.c b/arch/powerpc/platforms/44x/bgp_pers.c
new file mode 100644
index 00000000000000..431666565b4d8d
--- /dev/null
+++ b/arch/powerpc/platforms/44x/bgp_pers.c
@@ -0,0 +1,345 @@
+/*
+ *
+ * Blue Gene personality /proc interface with the control system
+ *
+ * Copyright 2003,2005 International Business Machines
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * User apps can mmap /proc/personality to directly access the binary
+ * personality in SRAM (see bglpersonality.h), or they can read
+ * /proc/personality.sh which expands to shell commands (so it can be sourced)
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+
+#include <asm/bluegene.h>
+#include <asm/bgp_personality.h>
+
+
+static struct proc_dir_entry *personality_proc_entry = NULL;
+static struct proc_dir_entry *personality_sh_proc_entry = NULL;
+
+
+static BGP_Personality_t bgpers;
+
+/* Binary personality interface. Doesn't need to be fast. */
+static int bgpersonality_read(char *page, char **start, off_t offset,
+ int count, int *eof, void *data)
+{
+ bluegene_getPersonality(&bgpers, count);
+ memcpy(page, &bgpers, count);
+ *eof = 1;
+
+ return count;
+}
+
+
+static void* bgpers_sh_seq_start(struct seq_file* f,
+ loff_t* pos)
+{
+ return *pos <= 32 ? (void*) pos : (void*) NULL;
+}
+
+
+static void* bgpers_sh_seq_next(struct seq_file* f,
+ void* v,
+ loff_t* pos)
+{
+ return ++(*pos) <= 32 ? (void*) pos : (void*) NULL;
+}
+
+
+static void bgpers_sh_seq_stop(struct seq_file* f,
+ void* v)
+{
+ return;
+}
+
+
+/* Produce a personality in a form parsable by a shell. */
+static int bgpers_sh_seq_show(struct seq_file* f,
+ void* v)
+{
+ loff_t offset = *((loff_t*) v);
+ BGP_UCI_ComputeCard_t* uci;
+
+ bluegene_getPersonality(&bgpers, sizeof(bgpers));
+ uci = (BGP_UCI_ComputeCard_t*) &bgpers.Kernel_Config.UniversalComponentIdentifier;
+
+ switch((unsigned long) offset) {
+ case 0:
+ seq_printf(f, "BG_UCI=%08x\n",
+ bgpers.Kernel_Config.UniversalComponentIdentifier);
+ break;
+ case 1:
+ seq_printf(f, "BG_LOCATION=R%1x%1x-M%c-N%02d-J%02d\n",
+ uci->RackRow, uci->RackColumn, (uci->Midplane ? '1' : '0'),
+ uci->NodeCard, uci->ComputeCard);
+ break;
+ case 2:
+ seq_printf(f, "BG_MAC=%02x:%02x:%02x:%02x:%02x:%02x\n",
+ bgpers.Ethernet_Config.EmacID[0],
+ bgpers.Ethernet_Config.EmacID[1],
+ bgpers.Ethernet_Config.EmacID[2],
+ bgpers.Ethernet_Config.EmacID[3],
+ bgpers.Ethernet_Config.EmacID[4],
+ bgpers.Ethernet_Config.EmacID[5]);
+ break;
+ case 3:
+ seq_printf(f, "BG_IP=%d.%d.%d.%d\n",
+ bgpers.Ethernet_Config.IPAddress.octet[12],
+ bgpers.Ethernet_Config.IPAddress.octet[13],
+ bgpers.Ethernet_Config.IPAddress.octet[14],
+ bgpers.Ethernet_Config.IPAddress.octet[15]);
+ break;
+ case 4:
+ seq_printf(f, "BG_NETMASK=%d.%d.%d.%d\n",
+ bgpers.Ethernet_Config.IPNetmask.octet[12],
+ bgpers.Ethernet_Config.IPNetmask.octet[13],
+ bgpers.Ethernet_Config.IPNetmask.octet[14],
+ bgpers.Ethernet_Config.IPNetmask.octet[15]);
+ break;
+ case 5:
+ seq_printf(f, "BG_BROADCAST=%d.%d.%d.%d\n",
+ bgpers.Ethernet_Config.IPBroadcast.octet[12],
+ bgpers.Ethernet_Config.IPBroadcast.octet[13],
+ bgpers.Ethernet_Config.IPBroadcast.octet[14],
+ bgpers.Ethernet_Config.IPBroadcast.octet[15]);
+ break;
+ case 6:
+ seq_printf(f, "BG_GATEWAY=%d.%d.%d.%d\n",
+ bgpers.Ethernet_Config.IPGateway.octet[12],
+ bgpers.Ethernet_Config.IPGateway.octet[13],
+ bgpers.Ethernet_Config.IPGateway.octet[14],
+ bgpers.Ethernet_Config.IPGateway.octet[15]);
+ break;
+ case 7:
+ seq_printf(f, "BG_MTU=%d\n", bgpers.Ethernet_Config.MTU);
+ break;
+ case 8:
+ seq_printf(f, "BG_FS=%d.%d.%d.%d\n",
+ bgpers.Ethernet_Config.NFSServer.octet[12],
+ bgpers.Ethernet_Config.NFSServer.octet[13],
+ bgpers.Ethernet_Config.NFSServer.octet[14],
+ bgpers.Ethernet_Config.NFSServer.octet[15]);
+ break;
+ case 9:
+ seq_printf(f, "BG_EXPORTDIR=\"%s\"\n", bgpers.Ethernet_Config.NFSExportDir);
+ break;
+ case 10:
+ seq_printf(f, "BG_SIMULATION=%d\n",
+ (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_Simulation ? 1 : 0));
+ break;
+ case 11:
+ seq_printf(f, "BG_PSETNUM=%d\n", bgpers.Network_Config.PSetNum);
+ break;
+ case 12:
+ seq_printf(f, "BG_NUMPSETS=%d\n", bgpers.Network_Config.IOnodes);
+ break;
+ case 13:
+ seq_printf(f, "BG_NODESINPSET=%d\n", bgpers.Network_Config.PSetSize);
+ break;
+ case 14:
+ seq_printf(f, "BG_XSIZE=%d\n", bgpers.Network_Config.Xnodes);
+ break;
+ case 15:
+ seq_printf(f, "BG_YSIZE=%d\n", bgpers.Network_Config.Ynodes);
+ break;
+ case 16:
+ seq_printf(f, "BG_ZSIZE=%d\n", bgpers.Network_Config.Znodes);
+ break;
+ case 17:
+ seq_printf(f, "BG_VERBOSE=%d", (bgpers.Kernel_Config.TraceConfig & BGP_TRACE_VERBOSE) ? 1 : 0);
+ break;
+ case 18:
+ switch (bgpers.Network_Config.PSetSize) {
+ case 16:
+ seq_printf(f, "BG_PSETSIZE=\"4 2 2\"\n");
+ break;
+ case 32:
+ seq_printf(f, "BG_PSETSIZE=\"4 4 2\"\n");
+ break;
+ case 64:
+ seq_printf(f, "BG_PSETSIZE=\"4 4 4\"\n");
+ break;
+ case 128:
+ seq_printf(f, "BG_PSETSIZE=\"4 4 8\"\n");
+ break;
+ case 256:
+ seq_printf(f, "BG_PSETSIZE=\"8 4 8\"\n");
+ break;
+ case 512:
+ seq_printf(f, "BG_PSETSIZE=\"8 8 8\"\n");
+ break;
+ default:
+ seq_printf(f, "BG_PSETSIZE=\"? ? ?\"\n");
+ }
+ break;
+ case 19:
+/* if (bgpers.Network_Config.RankInPSet) */
+/* // Not an IO node so display pset origin. */
+ seq_printf(f, "BG_PSETORG=\"%d %d %d\"\n",
+ bgpers.Network_Config.Xcoord,
+ bgpers.Network_Config.Ycoord,
+ bgpers.Network_Config.Zcoord);
+ break;
+ case 20:
+ seq_printf(f, "BG_CLOCKHZ=%d\n", bgpers.Kernel_Config.FreqMHz);
+ break;
+ case 21:
+ seq_printf(f, "BG_GLINTS=%d\n",
+ (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_GlobalInts) ? 1 : 0);
+ break;
+ case 22:
+ seq_printf(f, "BG_ISTORUS=\"%s%s%s\"\n",
+ (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_TorusMeshX) ? "X" : "",
+ (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_TorusMeshY) ? "Y" : "",
+ (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_TorusMeshZ) ? "Z" : "");
+ break;
+ case 23: {
+ char blockID[BGP_PERSONALITY_LEN_NFSDIR+1];
+
+ strncpy(blockID, bgpers.Ethernet_Config.NFSMountDir, sizeof(blockID));
+ blockID[sizeof(blockID)-1] = '\0';
+ seq_printf(f, "BG_BLOCKID=\"%s\"\n", blockID);
+ break;
+ }
+ case 24:
+ seq_printf(f, "BG_SN=%d.%d.%d.%d\n",
+ bgpers.Ethernet_Config.serviceNode.octet[12],
+ bgpers.Ethernet_Config.serviceNode.octet[13],
+ bgpers.Ethernet_Config.serviceNode.octet[14],
+ bgpers.Ethernet_Config.serviceNode.octet[15]);
+ break;
+ case 25:
+ seq_printf(f, "BG_IS_IO_NODE=%d\n", (bgpers.Network_Config.RankInPSet ? 0 : 1));
+ break;
+ case 26:
+ seq_printf(f, "BG_RANK_IN_PSET=%d\nBG_RANK=%d\n",
+ bgpers.Network_Config.RankInPSet,
+ bgpers.Network_Config.Rank);
+ break;
+ case 27:
+ seq_printf(f, "BG_IP_OVER_COL=%d\n", (bgpers.Block_Config & BGP_PERS_BLKCFG_IPOverCollective) ? 1 : 0);
+ break;
+ case 28:
+ seq_printf(f, "BG_IP_OVER_TOR=%d\n", (bgpers.Block_Config & BGP_PERS_BLKCFG_IPOverTorus) ? 1 : 0);
+ break;
+ case 29:
+ seq_printf(f, "BG_IP_OVER_COL_VC=%d\n", (bgpers.Block_Config & BGP_PERS_BLKCFG_IPOverCollectiveVC) ? 1 : 0);
+ break;
+ case 30:
+ if ((bgpers.Block_Config & BGP_PERS_BLKCFG_CIOModeSel(3)) == BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_MuxOnly))
+ seq_printf(f, "BG_CIO_MODE=MUX_ONLY\n");
+ else if ((bgpers.Block_Config & BGP_PERS_BLKCFG_CIOModeSel(3)) == BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_None))
+ seq_printf(f, "BG_CIO_MODE=NONE\n");
+ else if ((bgpers.Block_Config & BGP_PERS_BLKCFG_CIOModeSel(3)) == BGP_PERS_BLKCFG_CIOModeSel(BGP_PERS_BLKCFG_CIOMode_Full))
+ seq_printf(f, "BG_CIO_MODE=FULL\n");
+ else
+ seq_printf(f, "BG_CIO_MODE=UNKNOWN\n");
+ break;
+ case 31:
+ if ((bgpers.Block_Config & BGP_PERS_BLKCFG_bgsysFSSel(3)) == BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv3))
+ seq_printf(f, "BG_BGSYS_FS_TYPE=NFSv3\n");
+ else if ((bgpers.Block_Config & BGP_PERS_BLKCFG_bgsysFSSel(3)) == BGP_PERS_BLKCFG_bgsysFSSel(BGP_PERS_BLKCFG_bgsys_NFSv4))
+ seq_printf(f, "BG_BGSYS_FS_TYPE=NFSv4\n");
+ else
+ seq_printf(f, "BG_BGSYS_FS_TYPE=UNKNOWN\n");
+ break;
+ case 32:
+ seq_printf(f, "BG_HTC_MODE=%d\n",
+ (bgpers.Kernel_Config.NodeConfig & BGP_PERS_ENABLE_HighThroughput) ? 1 : 0);
+ break;
+ default:
+ seq_printf(f, "Illegal offset %d\n", (unsigned int) offset);
+ }
+
+ return 0;
+}
+
+void bgpersonality_cleanup_module(void)
+{
+ if (personality_proc_entry) {
+ remove_proc_entry(personality_proc_entry->name, NULL);
+ }
+
+ if (personality_sh_proc_entry) {
+ remove_proc_entry(personality_sh_proc_entry->name, NULL);
+ }
+}
+
+
+
+static struct seq_operations bgpers_sh_seq_ops = {
+ .start = bgpers_sh_seq_start,
+ .next = bgpers_sh_seq_next,
+ .stop = bgpers_sh_seq_stop,
+ .show = bgpers_sh_seq_show
+};
+
+
+
+static int bgpers_sh_proc_open(struct inode* inode,
+ struct file* f)
+{
+ return seq_open(f, &bgpers_sh_seq_ops);
+}
+
+
+static struct file_operations bgpers_sh_fops = {
+ .owner = THIS_MODULE,
+ .open = bgpers_sh_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+
+int bgpersonality_init_module(void)
+{
+ personality_proc_entry = create_proc_read_entry("personality", 0644, NULL,
+ bgpersonality_read, (void *) 0);
+ if (!personality_proc_entry)
+ goto out;
+
+ personality_sh_proc_entry = create_proc_entry("personality.sh", 0, NULL);
+ if (!personality_sh_proc_entry)
+ goto out;
+ else
+ personality_sh_proc_entry->proc_fops = &bgpers_sh_fops;
+
+ return 0;
+
+out:
+ bgpersonality_cleanup_module();
+
+ return -ENOMEM;
+}
+
+
+module_init(bgpersonality_init_module);
+module_exit(bgpersonality_cleanup_module);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e868b5c50723d3..928d46ff72d926 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -231,7 +231,7 @@ config VIRT_CPU_ACCOUNTING
If in doubt, say Y here.
config SMP
- depends on PPC_STD_MMU || FSL_BOOKE
+ depends on PPC_STD_MMU || BOOKE
bool "Symmetric multi-processing support"
---help---
This enables support for systems with more than one CPU. If you have
@@ -259,9 +259,13 @@ config NR_CPUS
config NOT_COHERENT_CACHE
bool
depends on 4xx || 8xx || E200 || PPC_MPC512x
+ default n if BGP
default y
config CHECK_CACHE_COHERENCY
bool
+config L1_WRITETHROUGH
+ bool
+
endmenu
diff --git a/arch/powerpc/syslib/bgdd/Makefile b/arch/powerpc/syslib/bgdd/Makefile
new file mode 100644
index 00000000000000..4ee8cb1bae8231
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/Makefile
@@ -0,0 +1,11 @@
+#CFLAGS += -Wa,-m450
+
+EXTRA_CFLAGS := -D__LINUX_KERNEL__ -Wno-declaration-after-statement
+
+obj-$(CONFIG_BGP_DMA) += bgp_dma_spi.o
+
+
+bgp_dma_spi-y := bgp_dma_base.o
+bgp_dma_spi-y += spi/DMA_InjFifo.o
+bgp_dma_spi-y += spi/DMA_RecFifo.o
+bgp_dma_spi-y += spi/DMA_Descriptors.o
diff --git a/arch/powerpc/syslib/bgdd/bgp_dma_base.c b/arch/powerpc/syslib/bgdd/bgp_dma_base.c
new file mode 100644
index 00000000000000..5703368c524537
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/bgp_dma_base.c
@@ -0,0 +1,1284 @@
+/**********************************************************************
+ *
+ * Copyright (c) 2007, 2009 International Business Machines
+ * Chris Ward <tjcw@uk.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ **********************************************************************/
+
+
+/* ************************************************************************* */
+/* includes */
+/* ************************************************************************* */
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <linux/vmalloc.h>
+
+#include <linux/hugetlb.h>
+/* #include <asm/bluegene.h> */
+
+#include <asm/bgcns.h>
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR)
+#define TORNIC_TORUS_AFFINITY
+#endif
+
+/* int bgp_dma_irq ; */
+#if defined(TORNIC_TORUS_AFFINITY)
+void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ;
+enum {
+ k_TorusAffinityCPU = 2
+};
+#endif
+
+#define TRACE(x) printk x
+
+#define CHECK_RET(x) if (x) { TRACE((KERN_INFO \
+ "bgpdma: Return due error at line %d\n",\
+ __LINE__)); \
+ return ret; }
+
+#undef CHECK_PARAM
+#define CHECK_PARAM(x) if (!(x)) { printk( KERN_INFO \
+ "(E) bgpdma: Assertion failed in %s:%d\n", \
+ __FILE__,__LINE__); \
+ return -EINVAL; }
+#undef HPC_MODE
+/* #define HPC_MODE */
+
+
+/* ************************************************************************* */
+/* Include firmware */
+/* ************************************************************************* */
+
+/* ************************************************************************* */
+/* Defines and friends required by DMA SPI in kernel mode */
+/* ************************************************************************* */
+
+#include <spi/linux_kernel_spi.h>
+
+/* #include "bgp_bic_diagnosis.h" */
+/* ************************************************************************* */
+/* IOCTL commands */
+/* ************************************************************************* */
+
+/* size of mmap'ed IO memory */
+#define BGP_DMA_MMAP_SIZE (4096 * 4)
+/* */
+///* ************************************************************************* */
+///* network device structures */
+///* ************************************************************************* */
+/* */
+struct bgpdma_state_t
+{
+ uint32_t inj_counters[4]; /* for each group, a bit mask of which injection counter subgroups allocated */
+ /* bits 0 - 7 are valid, 8 subgroups of 8 counters/subgroup */
+ uint32_t rec_counters[4]; /* for each group, a bit mask of which reception counter subgroups allocated */
+ /* bits 0 - 7 are valid, 8 subgroups of 8 counters/subgroup */
+ uint32_t inj_fifos[4]; /* for each group, a bit mask of which injection fifos have been allocated */
+ /* bits 0 - 31 are valid */
+
+ uint32_t rec_fifo_set_map; /* if 1, _bgp_DMA_RecFifoSetMap has already been called */
+
+ uint32_t rec_fifo_init[2]; /* set bit to 1 if receive fifo has already been intialized, */
+ /* bits 0-31 of rec_fifo_init[0] for normal fifos */
+ /* bits 0-3 of rec_fifo_init[1] for header fifos */
+};
+
+/* max number of registered interrupt handlers */
+#define MAX_NUM_IRQ 4
+
+
+/* interrupt info sctructure */
+struct dma_irq
+{
+ int irq; /* irq number for this group */
+ /* (fixed at module init time) */
+ Kernel_CommThreadHandler func;
+ u32 arg1;
+};
+
+
+
+struct bgpdma_dev_t
+{
+ unsigned long long pa_addr; /* physical address */
+ struct bgpdma_state_t state; /* dma resource state */
+ struct dma_irq irqInfo[ MAX_NUM_IRQ ]; /* dma interrupts */
+};
+/* */
+static struct bgpdma_dev_t bgpdma_dev;
+
+/* ************************************************************************* */
+/* Linux module header */
+/* ************************************************************************* */
+
+MODULE_DESCRIPTION("BG/P DMA driver");
+MODULE_LICENSE("GPL");
+
+#define BGP_DMA_NAME "bgpdma"
+
+/* Threshold crossed irq number for rec fifo groups */
+#define DMA_RECFIFO_THRESHOLD_IRQ(group) ((_BGP_IC_DMA_NFT_G2_HIER_POS<<5)|(28+group))
+#define DMA_RECFIFO_THRESHOLD_IRQ_GINT(group) (28+group)
+
+/* Threshold crossed irq number for rec fifo groups */
+#define TORUS_RECFIFO_WATERMARK_IRQ(fifo) ((_BGP_IC_DMA_NFT_G2_HIER_POS<<5)|(8+fifo))
+#define TORUS_RECFIFO_WATERMARK_IRQ_GINT(fifo) (8+fifo)
+
+/* ************************************************************************* */
+/* module initialization/cleanup */
+/* ************************************************************************* */
+
+static int __init
+ bgpdma_module_init (void);
+static void __exit
+ bgpdma_module_cleanup (void);
+
+extern BGCNS_Descriptor bgcnsd;
+
+module_init(bgpdma_module_init);
+module_exit(bgpdma_module_cleanup);
+
+/* ************************************************************************* */
+/* BG/P DMA initialization */
+/* ************************************************************************* */
+
+/* dma physical address */
+#define _BGP_UA_DMA (0x6)
+#define _BGP_PA_DMA (0x00000000)
+
+/* virtual kernel based address of DMA */
+void * bgpdma_kaddr;
+EXPORT_SYMBOL(bgpdma_kaddr);
+
+
+/* check if DMA is mapped by the kernel */
+#define CHECK_DMA_ACCESS if ( ! bgpdma_kaddr ) { printk( KERN_INFO "(E) DMA is not mapped\n"); return -ENODEV; }
+
+
+
+/* dma interrupt handler */
+/* static unsigned int dmaHandlerCount ; */
+irqreturn_t dmaIrqHandler(int irq, void * arg)
+{
+ struct dma_irq * irqInfo = ( struct dma_irq * )arg;
+
+
+/* dmaHandlerCount += 1 ; */
+/* if( irq != 92 || dmaHandlerCount < 20 ) */
+/* { */
+/* printk( KERN_INFO "(I) bgpdma: rec fifo irq dmaIrqHandler called irq:%d arg:%08x\n", */
+/* irq, (int)arg); */
+/* // show_bic_regs() ; */
+/* } */
+ (*irqInfo->func)(irqInfo->arg1,0,0,0);
+ return IRQ_HANDLED;
+}
+
+/* irqreturn_t watermarkIrqHandler(int irq, void * arg) */
+/* { */
+/* struct dma_irq * irqInfo = ( struct dma_irq * )arg; */
+/* */
+/* */
+/* dmaHandlerCount += 1 ; */
+/* if( irq != 92 || dmaHandlerCount < 20 ) */
+/* { */
+/* printk( KERN_INFO "(I) bgpdma: rec fifo irq watermarkIrqHandler called irq:%d arg:%08x\n", */
+/* irq, (int)arg); */
+/* // show_bic_regs() ; */
+/* } */
+/* (*irqInfo->func)(irqInfo->arg1,0,0,0); */
+/* return IRQ_HANDLED; */
+/* } */
+
+irqreturn_t dummyIrqHandler(int irq, void * arg)
+{
+ printk( KERN_INFO "(I) bgpdma: dummy irq handler called irq:%d arg:%08x\n",
+ irq, (int)arg);
+ return IRQ_HANDLED;
+}
+
+
+static int /*__init*/ bgpdma_module_init (void)
+{
+/* int ret = -1; */
+/* dev_t devno; */
+
+ TRACE((
+ KERN_INFO "bgpdma: module initialization\n"
+ ));
+
+ bgpdma_dev.pa_addr = ((unsigned long long)_BGP_UA_DMA << 32) | _BGP_PA_DMA;
+
+ /* map DMA into kernel space */
+
+ if ( bgcnsd.services->isIONode() )
+ {
+ TRACE((
+ KERN_INFO "(I) DMA is not mapped on IO node\n"
+ ));
+ bgpdma_kaddr = NULL;
+ return 0;
+ }
+
+ bgpdma_kaddr = ioremap( bgpdma_dev.pa_addr, BGP_DMA_MMAP_SIZE );
+
+ if ( bgpdma_kaddr == NULL )
+ {
+ printk( KERN_INFO "(E) bgpdma: vmap() failed\n" );
+ return -ENOMEM;
+ }
+
+ /* Let bgcnsd know about the new address of the dma */
+ unsigned long flags;
+ local_irq_save(flags);
+ bgcnsd.services->mapDevice(BGCNS_DMA, bgpdma_kaddr );
+ local_irq_restore(flags);
+
+
+ TRACE((
+ KERN_INFO "bgpdma: module initialization finished, dma kaddr:%08x\n",
+ (unsigned)bgpdma_kaddr));
+
+ return 0;
+}
+
+/* ************************************************************************* */
+/* BG/P net module cleanup */
+/* ************************************************************************* */
+
+static void __exit
+ bgpdma_module_cleanup()
+{
+
+ /* release kernel mapping of dma */
+ iounmap ( bgpdma_kaddr );
+}
+
+
+
+/*
+ * Query free counter subgroups
+ */
+u32 Kernel_CounterGroupQueryFree( u32 type,
+ u32 grp,
+ u32 * num_subgrps,
+ u32 * subgrps )
+{
+ CHECK_DMA_ACCESS;
+
+ int ret = 0;
+ uint32_t counters;
+ int i;
+
+ if ( grp < 0 || grp >= 4 || type < 0 || type > 1 ) return -EINVAL;
+ if ( num_subgrps == NULL || subgrps == NULL ) return -EINVAL;
+
+ if ( type == 0 )
+ counters = bgpdma_dev.state.inj_counters[grp];
+ else
+ counters = bgpdma_dev.state.rec_counters[grp];
+
+ (*num_subgrps) = 0;
+ for(i=0; i < DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP; i++ )
+ {
+ if ( ( counters & _BN(i) ) == 0)
+ {
+ subgrps[*num_subgrps] = i;
+ (*num_subgrps)++;
+ }
+ }
+
+ TRACE((
+ KERN_INFO "Allocated counters:%08x num_free:%d\n",counters,(int)num_subgrps));
+
+ return ret;
+}
+EXPORT_SYMBOL(Kernel_CounterGroupQueryFree);
+
+
+/*
+ * Allocate counter subgroups
+ */
+u32 Kernel_CounterGroupAllocate( u32 type,
+ u32 grp,
+ u32 num_subgrps,
+ u32 * subgrps,
+ u32 target, /* not used */
+ u32 handler, /* not used */
+ u32 * handler_parm, /* not used */
+ u32 interruptGroup, /* not used */
+ u32 * cg )
+{
+ CHECK_DMA_ACCESS;
+
+ unsigned i,j;
+ u32 *counters;
+ u32 c_bits;
+ int min_id, max_id, word_id, bit_id, global_subgrp;
+ DMA_CounterGroup_t * cg_ptr = (DMA_CounterGroup_t *)cg;
+ if ( type > 1 ) return -EINVAL;
+ if ( grp >= 4 ) return -EINVAL;
+ if ( subgrps == NULL ) return -EINVAL;
+ if ( num_subgrps <= 0 ) return -EINVAL;
+ if ( num_subgrps > DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ) return -EINVAL;
+ if ( cg_ptr == NULL ) return -EINVAL;
+
+ if ( type == DMA_Type_Injection )
+ counters = &bgpdma_dev.state.inj_counters[grp];
+ else
+ counters = &bgpdma_dev.state.rec_counters[grp];
+
+ c_bits = 0;
+ for(i=0;i< num_subgrps;i++)
+ {
+ if ( subgrps[i] < 0 ) return -EINVAL;
+ if (subgrps[i] >= DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ) return -EINVAL;
+ if ( *counters & _BN(subgrps[i]) )
+ {
+ printk( KERN_WARNING
+ "bgpdma: tried to allocate busy counters grp:%d subgrps:%d\n",
+ grp, subgrps[i]);
+ return -EBUSY;
+ }
+ c_bits |= _BN(subgrps[i]);
+ }
+
+ memset( cg_ptr, 0, sizeof(DMA_CounterGroup_t));
+ cg_ptr->type = type;
+ cg_ptr->group_id = grp;
+
+ if ( type == DMA_Type_Injection )
+ cg_ptr->status_ptr = (DMA_CounterStatus_t *) _BGP_VA_iDMA_COUNTER_ENABLED(grp,0);
+ else
+ cg_ptr->status_ptr = (DMA_CounterStatus_t *) _BGP_VA_rDMA_COUNTER_ENABLED(grp,0);
+
+ for(i=0;i< num_subgrps;i++)
+ {
+ min_id = subgrps[i] * DMA_NUM_COUNTERS_PER_SUBGROUP;
+ max_id = min_id + DMA_NUM_COUNTERS_PER_SUBGROUP;
+ global_subgrp = (grp * DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ) + subgrps[i];
+
+ cg_ptr->grp_permissions |= _BN( global_subgrp );
+ for ( j = min_id; j < max_id; j++ )
+ {
+ word_id = DMA_COUNTER_GROUP_WORD_ID(j);
+ bit_id = DMA_COUNTER_GROUP_WORD_BIT_ID(j);
+ cg_ptr->permissions[ word_id ] |= _BN(bit_id);
+
+ if ( type == DMA_Type_Injection )
+ {
+ cg_ptr->counter[j].counter_hw_ptr =
+ ( DMA_CounterHw_t *) _BGP_VA_iDMA_COUNTER(grp,j);
+ DMA_CounterSetValueBaseHw(cg_ptr->counter[j].counter_hw_ptr, 0, 0);
+ /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->counter); */
+ /* CHECK_RET(ret); */
+ /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->pa_base); */
+ /* CHECK_RET(ret); */
+
+ TRACE((
+ KERN_INFO "DMA Injection cntr allocated: %d(%08x)\n",
+ j,(unsigned)cg_ptr->counter[j].counter_hw_ptr));
+ }
+ else
+ {
+ cg_ptr->counter[j].counter_hw_ptr =
+ ( DMA_CounterHw_t *) _BGP_VA_rDMA_COUNTER(grp,j);
+ DMA_CounterSetValueBaseMaxHw(cg_ptr->counter[j].counter_hw_ptr, 0, 0, 0);
+ /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->counter); */
+ /* CHECK_RET(ret); */
+ /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->pa_base); */
+ /* CHECK_RET(ret); */
+ /* ret = put_user( 0, &cg_ptr->counter[j].counter_hw_ptr->pa_max); */
+ /* CHECK_RET(ret); */
+
+ TRACE((
+ KERN_INFO "DMA Reception cntr allocated: %d(%08x)\n",
+ j,(unsigned)cg_ptr->counter[j].counter_hw_ptr));
+ }
+ /* disable the counter, clear it's hit-zero */
+ /* DMA_CounterSetDisableById ( cg_ptr,j ); */
+ cg_ptr->status_ptr->disable[word_id] = _BN(bit_id);
+ /* ret = put_user( _BN(bit_id), &cg_ptr->status_ptr->disable[word_id] ); */
+ /* CHECK_RET(ret); */
+ /* DMA_CounterClearHitZeroById( &cg,j ); */
+ cg_ptr->status_ptr->clear_hit_zero[word_id] = _BN(bit_id);
+ /* ret = put_user( _BN(bit_id), &cg_ptr->status_ptr->clear_hit_zero[word_id] ); */
+ /* CHECK_RET(ret); */
+ }
+ }
+
+ _bgp_msync();
+
+ /* mark counters allocated in the global state */
+ *counters |= c_bits;
+
+ TRACE((
+ KERN_INFO "Allocated counters:%08x\n",*counters));
+
+ return 0;
+}
+EXPORT_SYMBOL(Kernel_CounterGroupAllocate);
+
+
+/*
+ * Query free inj fifos
+ */
+u32 Kernel_InjFifoGroupQueryFree( u32 grp, u32 * num_fifos, u32 * fifo_ids )
+{
+ CHECK_DMA_ACCESS;
+
+ int ret = 0;
+ u32 state;
+ int i;
+
+ if ( grp >= DMA_NUM_INJ_FIFO_GROUPS ) return -EINVAL;
+ if ( num_fifos == NULL || fifo_ids == NULL ) return -EINVAL;
+
+ state = bgpdma_dev.state.inj_fifos[grp];
+
+ (*num_fifos) = 0;
+ for(i=0;i< DMA_NUM_INJ_FIFOS_PER_GROUP;i++)
+ {
+ if ( ( state & _BN(i) ) == 0 )
+ {
+ fifo_ids[(*num_fifos)] = i;
+ (*num_fifos)++;
+ TRACE((
+ KERN_INFO "Free inj fifo: %d\n",i));
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(Kernel_InjFifoGroupQueryFree);
+
+
+/*
+ * Allocate inj fifos from a group
+ */
+u32 Kernel_InjFifoGroupAllocate( u32 grp,
+ u32 num_fifos,
+ u32 * ids,
+ u16 * pri,
+ u16 * loc,
+ u8 * map,
+ u32 * fg )
+{
+ CHECK_DMA_ACCESS;
+
+ /* MUST be called when the DMA is inactive, prior to any DMA activity */
+ int i;
+ u32 f_bits =0;
+ u32 p_bits =0;
+ u32 l_bits =0;
+ DMA_InjFifoGroup_t * fg_ptr = (DMA_InjFifoGroup_t *)fg;
+
+ if ( fg_ptr == NULL ) return -EINVAL;
+ if ( grp < 0 || grp >= DMA_NUM_FIFO_GROUPS ) return -EINVAL;
+ if ( num_fifos <= 0 || num_fifos > DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL;
+ if ( ids == NULL || pri == NULL || map == NULL ) return -EINVAL;
+
+ f_bits = 0; /* holds a bit vector of all fifos used in this allocation */
+ for ( i = 0; i < num_fifos; i++ )
+ {
+ if ( ids[i] >= DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL;
+ if ( pri[i] > 1 || loc[i] > 1 ) return -EINVAL;
+ if ( loc[i] == 0 && map[i] == 0 ) return -EINVAL;
+ if ( loc[i] == 1 && map[i] != 0 ) return -EINVAL;
+
+ if ( bgpdma_dev.state.inj_fifos[grp] & _BN(ids[i]) )
+ {
+ printk( KERN_WARNING
+ "bgpdma: tried to allocate busy inj fifos grp:%d fifo_id:%d\n",
+ grp, ids[i]);
+ return -EBUSY;
+ }
+
+ f_bits |= _BN(ids[i]);
+ if ( loc[i] == 1 ) l_bits |= _BN(i);
+ if ( pri[i] == 1 ) p_bits |= _BN(i);
+ }
+
+
+ memset( fg_ptr, 0, sizeof(DMA_InjFifoGroup_t));
+ fg_ptr->status_ptr = (DMA_InjFifoStatus_t *) _BGP_VA_iDMA_NOT_EMPTY(grp);
+ fg_ptr->group_id = grp;
+ fg_ptr->permissions |= f_bits;
+
+ /* Disable interrupts and the injection FIFOs */
+ unsigned long flags;
+ local_irq_save(flags);
+ bgcnsd.services->
+ setDmaFifoControls( BGCNS_Disable,BGCNS_InjectionFifoInterrupt, grp,f_bits,NULL );
+ bgcnsd.services->
+ setDmaFifoControls( BGCNS_Disable,BGCNS_InjectionFifo, grp,f_bits,NULL );
+ local_irq_restore(flags);
+
+ /* deactivate all these fifos */
+ fg_ptr->status_ptr->deactivate = f_bits;
+ /* ret = put_user( f_bits, &fg.status_ptr->deactivate ); */
+ /* CHECK_RET(ret); */
+
+ _bgp_mbar(); /* make sure write is in the DMA */
+
+ local_irq_save(flags);
+ bgcnsd.services->setDmaInjectionMap( grp, (unsigned*)ids, map, num_fifos );
+ local_irq_restore(flags);
+
+ for ( i=0;i< num_fifos; i++)
+ {
+ fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr =
+ ( DMA_FifoHW_t *) _BGP_VA_iDMA_START(grp, ids[i]);
+ fg_ptr->fifos[ids[i]].fifo_id = ids[i];
+ fg_ptr->fifos[ids[i]].desc_count = 0;
+ fg_ptr->fifos[ids[i]].occupiedSize = 0;
+ fg_ptr->fifos[ids[i]].priority = pri[i] ;
+ fg_ptr->fifos[ids[i]].local = loc[i];
+ fg_ptr->fifos[ids[i]].ts_inj_map = map[i];
+
+ /* write 0's to the hw fifo */
+ fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_start = 0;
+ /* ret = put_user( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_start ); */
+ /* CHECK_RET(ret); */
+ fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_head = 0;
+ /* ret = put_user ( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_head ); */
+ /* CHECK_RET(ret); */
+ fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_tail = 0;
+ /* ret = put_user( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_tail ); */
+ /* CHECK_RET(ret); */
+ fg_ptr->fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_end = 0;
+ /* ret = put_user( 0, &fg.fifos[ids[i]].dma_fifo.fifo_hw_ptr->pa_end ); */
+ /* CHECK_RET(ret); */
+
+/* TRACE((KERN_INFO "Allocate inj fifo: %d",ids[i])); */
+ }
+
+ /* clear the threshold crossed */
+ _bgp_mbar(); /* no previous write will pass this one */
+ fg_ptr->status_ptr->clear_threshold_crossed = f_bits;
+ /* ret = put_user( f_bits, &fg.status_ptr->clear_threshold_crossed ); */
+ /* CHECK_RET(ret); */
+
+ local_irq_save(flags);
+ /* set the local copy bits */
+ bgcnsd.services->setDmaLocalCopies(BGCNS_Enable, grp, l_bits);
+ /* set the priority bits */
+ bgcnsd.services->setDmaPriority(BGCNS_Enable, grp, p_bits);
+
+ /* Enable interrupts for these fifos. */
+ /* NOTE: enablement of the injection FIFO will take place during FIFO init. */
+ /* _bgp_cns()->setDmaFifoControls( BGCNS_Enable, BGCNS_InjectionFifoInterrupt, grp, f_ids, NULL ); */
+ local_irq_restore(flags);
+
+ /* mark fifos allocated in the global state */
+ bgpdma_dev.state.inj_fifos[grp] |= f_bits;
+
+ return 0;
+}
+EXPORT_SYMBOL(Kernel_InjFifoGroupAllocate);
+
+/*
+ * General fifo init
+ */
+static inline int FifoInit( DMA_Fifo_t * f_ptr,
+ void * va_start,
+ void * va_head,
+ void * va_end )
+{
+ int ret = 0;
+ uint32_t pa_start, pa_head, pa_end;
+ unsigned bytes;
+
+ TRACE((
+ KERN_INFO "FifoInit va_start:%08x va_head:%08x va_end:%08x\n",
+ (u32)va_start,(u32)va_head,(u32)va_end));
+
+ if ( f_ptr == NULL ) return -EINVAL;
+ if ( f_ptr->fifo_hw_ptr == NULL ) return -EINVAL;
+ if ( ((uint32_t)va_start & 0x1F) != 0 ) return -EINVAL;
+ if ( ((uint32_t)va_end & 0x1F) != 0 ) return -EINVAL;
+ if ( ((uint32_t)va_head & 0xF ) != 0 ) return -EINVAL;
+
+ bytes = (uint32_t)va_end - (uint32_t)va_start;
+
+ /* translate start address ( and check if the region is contigouos) */
+ pa_start = virt_to_phys ( va_start );
+/* TRACE((KERN_INFO "bgpdma: FifoInit() va_start:%08x pa_start:%08x shifted:%08x", */
+/* (u32)va_start, pa_start, pa_start>>4 )); */
+ pa_start >>= 4; /* we need 16-byte aligned address */
+
+ /* ret = VaTo4bitShiftedPa( va_start, bytes, &pa_start ); */
+ /* CHECK_RET(ret); */
+
+ /* physical region is contigouos, we can compute pa_end and pa_head */
+ pa_end = pa_start + ( bytes >> 4 );
+ pa_head = pa_start + ( ((uint32_t)va_head - (uint32_t)va_start ) >> 4 );
+
+ /* Write the start, end , head and tail(= head) */
+ f_ptr->fifo_hw_ptr->pa_start = pa_start;
+ /* ret = put_user ( pa_start, &f_ptr->fifo_hw_ptr->pa_start ); */
+ /* CHECK_RET(ret); */
+ f_ptr->fifo_hw_ptr->pa_head = pa_head;
+ /* ret = put_user( pa_head, &f_ptr->fifo_hw_ptr->pa_head ); */
+ /* CHECK_RET(ret); */
+ f_ptr->fifo_hw_ptr->pa_tail = pa_head;
+ /* ret = put_user( pa_head, &f_ptr->fifo_hw_ptr->pa_tail ); */
+ /* CHECK_RET(ret); */
+ f_ptr->fifo_hw_ptr->pa_end = pa_end;
+ /* ret = put_user( pa_end, &f_ptr->fifo_hw_ptr->pa_end ); */
+ /* CHECK_RET(ret); */
+
+ _bgp_mbar();
+
+ /* Save the shadows in the structure */
+ f_ptr->pa_start = pa_start;
+ f_ptr->va_start = va_start;
+ f_ptr->va_end = va_end;
+ f_ptr->va_head = va_head;
+ f_ptr->va_tail = va_head;
+
+ /* Compute the free space */
+ f_ptr->fifo_size = bytes >> 4; /* Number of 16B quads */
+ f_ptr->free_space = f_ptr->fifo_size;
+
+ return ret;
+}
+
+
+/*
+ * Initialize an injection fifo
+ */
+u32 Kernel_InjFifoInitById( u32 * fg,
+ int fifo_id,
+ u32 * va_start,
+ u32 * va_head,
+ u32 * va_end )
+{
+ CHECK_DMA_ACCESS;
+
+ int ret = 0;
+ int grp;
+ uint32_t x_phead, x_vstart, x_pstart, x_vtail;
+ DMA_InjFifoGroup_t * fg_ptr = (DMA_InjFifoGroup_t *)fg;
+
+ if ( fg_ptr == NULL ) return -EINVAL;
+ if ( fifo_id < 0 || fifo_id >= DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL;
+ if ( va_start >= va_end || va_start > va_head || va_head > va_end ) return -EINVAL;
+ if ( (u32)va_head+DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES > (u32)va_end ) return -EINVAL;
+ if ( (u32)va_end - (u32)va_start < DMA_MIN_INJ_FIFO_SIZE_IN_BYTES ) return -EINVAL;
+ if ( ((u32)va_start & 0x1F) != 0 ) return -EINVAL;
+ if ( ((u32)va_end & 0x1F) != 0 ) return -EINVAL;
+ if ( ((u32)va_head & 0xF) != 0 ) return -EINVAL;
+
+ if (( fg_ptr->permissions & _BN(fifo_id)) == 0 ) return -EBUSY;
+
+ grp = fg_ptr->group_id;
+
+
+ /* Disable the injection FIFO and its interrupt: */
+ unsigned long flags;
+ local_irq_save(flags);
+ bgcnsd.services->
+ setDmaFifoControls(BGCNS_Disable, BGCNS_InjectionFifo, grp, _BN(fifo_id), NULL);
+ bgcnsd.services->
+ setDmaFifoControls(BGCNS_Disable, BGCNS_InjectionFifoInterrupt, grp, _BN(fifo_id), NULL );
+ local_irq_restore(flags);
+
+
+ /* Deactivate the fifo */
+ fg_ptr->status_ptr->deactivate = _BN(fifo_id);
+ /* ret = put_user ( _BN(fifo_id), &fg.status_ptr->deactivate ); */
+ /* CHECK_RET(ret); */
+
+ /* Initialize the fifo */
+ ret = FifoInit( &fg_ptr->fifos[fifo_id].dma_fifo, va_start, va_head, va_end );
+ CHECK_RET(ret);
+
+ /* Initialize the descriptor count and occupied size */
+ fg_ptr->fifos[fifo_id].desc_count = 0;
+ fg_ptr->fifos[fifo_id].occupiedSize = 0;
+
+ /* clear the threshold crossed */
+ fg_ptr->status_ptr->clear_threshold_crossed = _BN(fifo_id);
+ /* ret = put_user( _BN(fifo_id), &fg.status_ptr->clear_threshold_crossed ); */
+ /* CHECK_RET(ret); */
+
+ /* read back something from the dma to ensure all writes have occurred */
+ /* head should equal tail */
+ x_phead = fg_ptr->fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head;
+ /* ret = get_user( x_phead, &fg.fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head ); */
+ /* CHECK_RET(ret); */
+ x_vstart = (uint32_t)(fg_ptr->fifos[fifo_id].dma_fifo.va_start);
+ x_pstart = (uint32_t)(fg_ptr->fifos[fifo_id].dma_fifo.pa_start);
+ x_vtail = (uint32_t)(fg_ptr->fifos[fifo_id].dma_fifo.va_tail);
+ if ( x_vstart + ( (x_phead - x_pstart) << 4 ) != x_vtail ) return -EIO;
+
+
+
+ /* Enable the FIFO and its interrupt: */
+ local_irq_save(flags);
+ bgcnsd.services->
+ setDmaFifoControls(BGCNS_Enable, BGCNS_InjectionFifo, grp, _BN(fifo_id), NULL);
+ /* bgcnsd.services->setDmaFifoControls(BGCNS_Enable, BGCNS_InjectionFifoInterrupt, grp, _BN(fifo_id), NULL); */
+ local_irq_restore(flags);
+
+ /* Activate the fifo */
+ fg_ptr->status_ptr->activate = _BN(fifo_id);
+ /* ret = put_user( _BN(fifo_id), &fg.status_ptr->activate ); */
+ /* CHECK_RET(ret); */
+
+ return 0;
+}
+EXPORT_SYMBOL(Kernel_InjFifoInitById);
+
+
+/*
+ * Free inj fifos
+ */
+uint32_t Kernel_InjFifoGroupFree(uint32_t grp,
+ uint32_t num_fifos,
+ uint32_t * fifo_ids,
+ uint32_t * fg)
+{
+ int ret = 0;
+ u32 f_bits =0;
+ int i;
+ DMA_InjFifoGroup_t * fg_ptr = (DMA_InjFifoGroup_t *)fg;
+
+ if ( fg_ptr == NULL ) return -EINVAL;
+ if ( grp < 0 || grp >= DMA_NUM_FIFO_GROUPS ) return -EINVAL;
+ if ( num_fifos <= 0 || num_fifos > DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL;
+ if ( fifo_ids == NULL ) return -EINVAL;
+
+ f_bits = 0; /* holds a bit vector of all fifos used in this allocation */
+ for ( i = 0; i < num_fifos; i++ )
+ {
+ if ( fifo_ids[i] >= DMA_NUM_INJ_FIFOS_PER_GROUP ) return -EINVAL;
+
+ if ( ! (bgpdma_dev.state.inj_fifos[grp] & _BN(fifo_ids[i])) )
+ {
+ printk( KERN_WARNING
+ "bgpdma: tried to free a non-allocated inj fifo grp:%d fifo_id:%d\n",
+ grp, fifo_ids[i]);
+ return -EBUSY;
+ }
+
+ f_bits |= _BN(fifo_ids[i]);
+ }
+
+ for ( i=0;i< num_fifos; i++)
+ fg_ptr->fifos[fifo_ids[i]].dma_fifo.fifo_hw_ptr = NULL;
+
+ fg_ptr->permissions ^= f_bits;
+ fg_ptr->status_ptr->deactivate = f_bits;
+
+ return ret;
+}
+
+
+
+/*
+ * Set the reception fifos map
+ */
+int Kernel_RecFifoSetMap( u32 * map )
+{
+ CHECK_DMA_ACCESS;
+
+ int i, g;
+ DMA_RecFifoMap_t * map_ptr = (DMA_RecFifoMap_t *)map;
+
+ /* NEED TO PUT A LOCK AROUND THIS, Assume either the syscall mechanism does this */
+ /* or it has to be put here */
+
+ /* MUST BE CALLED ONCE, Prior to Any DMA activity */
+ /* Specifically, must be called after _bgp_DMA_Reset_Release */
+ /* and prior to any _BGP_rDMA_Fifo_Get_Fifo_Group calls */
+
+ if ( map_ptr == NULL ) return -EINVAL;
+ if ( map_ptr->save_headers > 1 ) return -EINVAL;
+
+ for (i=0; i< DMA_NUM_NORMAL_REC_FIFOS; i++)
+ if ( ( map_ptr->fifo_types[i] < 0 ) || ( map_ptr->fifo_types[i] > 1)) return -EINVAL;
+
+ /* rec fifo map can be set only once */
+ if ( bgpdma_dev.state.rec_fifo_set_map != 0 ) return -EBUSY;
+
+ if ( map_ptr->save_headers == 1)
+ for (i=0; i< DMA_NUM_HEADER_REC_FIFOS; i++)
+ if ( ( map_ptr->hdr_fifo_types[i] <0 ) || ( map_ptr->hdr_fifo_types[i] > 1 ))
+ return -EINVAL;
+
+ for (g=0; g< DMA_NUM_REC_FIFO_GROUPS;g++)
+ for (i=0; i< DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP; i++)
+ if ( map_ptr->ts_rec_map[g][i] >= DMA_NUM_NORMAL_REC_FIFOS)
+ return -EINVAL;
+
+ TRACE((
+ KERN_INFO "bgpdma: Kernel_RecFifoSetMap() disabling reception FIFO interrupts\n"));
+
+ unsigned long flags;
+ local_irq_save(flags);
+ /* Disable the reception FIFOs */
+ bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionFifo, 0 /* group not used */, 0xFFFFFFFF, NULL );
+ bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifo, BGCNS_DMA_ALL_GROUPS, 0 /* mask not used */, NULL );
+
+ /* Set the map: */
+ bgcnsd.services->setDmaReceptionMap(map_ptr->ts_rec_map,
+ map_ptr->fifo_types,
+ map_ptr->save_headers ? map_ptr->hdr_fifo_types : NULL,
+ map_ptr->threshold );
+
+ local_irq_restore(flags);
+
+ /* Don't enable the fifos here, the fifo init will do that */
+ bgpdma_dev.state.rec_fifo_set_map = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(Kernel_RecFifoSetMap);
+
+
+/*
+ * Get the reception fifos map
+ */
+int Kernel_RecFifoGetMap( u32 * map )
+{
+ CHECK_DMA_ACCESS;
+
+ int ret;
+ DMA_RecFifoMap_t * map_ptr = (DMA_RecFifoMap_t *)map;
+
+ if ( map_ptr == NULL ) return -EINVAL;
+
+ memset( map_ptr, 0, sizeof(DMA_RecFifoMap_t) );
+
+ unsigned long flags;
+ local_irq_save(flags);
+
+ ret = bgcnsd.services->getDmaReceptionMap( map_ptr->ts_rec_map,
+ map_ptr->fifo_types,
+ &(map_ptr->save_headers),
+ map_ptr->hdr_fifo_types,
+ map_ptr->threshold);
+
+ local_irq_restore(flags);
+
+ CHECK_RET(ret);
+
+ return 0;
+}
+EXPORT_SYMBOL(Kernel_RecFifoGetMap);
+
+/*
+ * Initialize a receiver fifo group
+ */
+int Kernel_RecFifoGetFifoGroup( u32 * fg,
+ int grp, /* group number */
+ int target, /* not used */
+ void * normal_handler, /* not used */
+ void * normal_handler_parm,/* not used */
+ void * header_handler, /* not used */
+ void * header_handler_parm,/* not used */
+ void * interruptGroup ) /* not used */
+{
+ CHECK_DMA_ACCESS;
+
+ int ret;
+ DMA_RecFifoMap_t map;
+
+ uint32_t used_fifos;
+ int g,i,j,min_id,max_id,idx;
+ uint32_t x;
+ DMA_RecFifoGroup_t * fg_ptr = (DMA_RecFifoGroup_t *)fg;
+
+ if ( fg_ptr == NULL ) return -EINVAL;
+ if ( grp < 0 || grp > DMA_NUM_REC_FIFO_GROUPS ) return -EINVAL;
+ /* if ( target < 0 || target > 4 ) return -EINVAL; */
+
+ memset( fg_ptr, 0, sizeof(DMA_RecFifoGroup_t) );
+
+
+ /* get the map */
+ unsigned long flags;
+ local_irq_save(flags);
+ ret = bgcnsd.services->getDmaReceptionMap( map.ts_rec_map,
+ map.fifo_types,
+ &(map.save_headers),
+ map.hdr_fifo_types,
+ map.threshold);
+ local_irq_restore(flags);
+
+ CHECK_RET(ret);
+
+ /* set the mask */
+ fg_ptr->group_id = grp;
+ switch(grp)
+ {
+ case 0: fg_ptr->mask = 0xFF000000; break;
+ case 1: fg_ptr->mask = 0x00FF0000; break;
+ case 2: fg_ptr->mask = 0x0000FF00; break;
+ case 3: fg_ptr->mask = 0x000000FF; break;
+ }
+
+ /* set the status pointer */
+ fg_ptr->status_ptr = ( DMA_RecFifoStatus_t *) _BGP_VA_rDMA_NOT_EMPTY(grp,0);
+
+ /* figure out which normal fifos are being used */
+ min_id = (grp*DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP);
+ max_id = min_id +DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1;
+
+ used_fifos = 0;
+ for (g=0;g< DMA_NUM_REC_FIFO_GROUPS;g++)
+ for(i=0;i<DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP;i++)
+ if ( ( map.ts_rec_map[g][i] >= min_id ) && (map.ts_rec_map[g][i] <= max_id) )
+ used_fifos |= _BN(map.ts_rec_map[g][i]);
+
+ idx = 0;
+ for(j= 0;j<DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP;j++)
+ {
+ i = min_id + j;
+ if ( ( _BN(i) & used_fifos) != 0 )
+ {
+ fg_ptr->fifos[idx].type = map.fifo_types[i];
+ fg_ptr->fifos[idx].global_fifo_id = i;
+ fg_ptr->fifos[idx].num_packets_processed_since_moving_fifo_head = 0;
+ fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr = ( DMA_FifoHW_t *) _BGP_VA_rDMA_START(grp,j);
+ /* Make sure this fifo is disabled */
+ fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_start = 0;
+ /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_start ); */
+ /* CHECK_RET(ret); */
+ fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_head = 0;
+ /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_head ); */
+ /* CHECK_RET(ret); */
+ fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_tail = 0;
+ /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_tail ); */
+ /* CHECK_RET(ret); */
+ fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_end = 0;
+ /* ret = put_user( 0, &fg_ptr->fifos[idx].dma_fifo.fifo_hw_ptr->pa_end ); */
+ /* CHECK_RET(ret); */
+
+ idx++;
+ }
+ } /* j loop */
+
+ /* are we saving headers? */
+ if ( map.save_headers == 1 )
+ {
+ fg_ptr->num_hdr_fifos = 1;
+ fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].type = map.hdr_fifo_types[grp];
+ fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].global_fifo_id = DMA_NUM_NORMAL_REC_FIFOS+grp;
+ fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].num_packets_processed_since_moving_fifo_head = 0;
+ fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr =
+ ( DMA_FifoHW_t *) _BGP_VA_rDMA_START(grp, DMA_HEADER_REC_FIFO_ID);
+
+ fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_start = 0;
+ /* ret = */
+ /* put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_start ); */
+ /* CHECK_RET(ret); */
+
+ fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_head = 0;
+ /* ret = */
+ /* put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_head ); */
+ /* CHECK_RET(ret); */
+ fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_tail = 0;
+ /* ret = */
+ /* put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_tail ); */
+ /* CHECK_RET(ret); */
+ fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_end = 0;
+ /* ret = */
+ /* put_user( 0, &fg_ptr->fifos[DMA_HEADER_REC_FIFO_ID].dma_fifo.fifo_hw_ptr->pa_end ); */
+ /* CHECK_RET(ret); */
+ }
+
+ fg_ptr->num_normal_fifos = idx;
+ fg_ptr->status_ptr->clear_threshold_crossed[0] = fg_ptr->mask;
+ /* ret = put_user( fg_ptr->mask, &fg_ptr->status_ptr->clear_threshold_crossed[0] ); */
+ /* CHECK_RET(ret); */
+ fg_ptr->status_ptr->clear_threshold_crossed[1] = fg_ptr->mask;
+ /* ret = put_user( fg_ptr->mask, &fg_ptr->status_ptr->clear_threshold_crossed[1] ); */
+ /* CHECK_RET(ret); */
+
+ /* read back from the dma to ensure all writes have occurred */
+ _bgp_mbar();
+ x = fg_ptr->status_ptr->threshold_crossed[0];
+ /* ret = get_user( x, &fg_ptr->status_ptr->threshold_crossed[0] ); */
+ /* if ( ret ) return ret; */
+ if ( (x & fg_ptr->mask) != 0 ) return -EIO;
+
+ /* reenable interrupts, if necessary */
+ /* */
+ /* DCRs 0xD71, 0xD72, 0xD73, and 0xD74 contain bits indicating which */
+ /* reception fifos will be enabled for interrupt 0, 1, 2, and 3, respectively. */
+ /* These interrupts correspond to BIC interrupt group 2, IRQs 28, 29, 30, and */
+ /* 31, respectively. Thus, if bit i is on in DCR 0xD7z, and rec fifo i's */
+ /* free space drops below the threshold for that fifo, then IRQ 28 + (z-1) */
+ /* will fire. */
+ /* */
+ /* For each reception fifo in this group, turn on bit i in DCR 0xD7z, where */
+ /* z-1 is the group number. */
+ /* */
+
+ used_fifos = 0;
+ for (i = 0; i < fg_ptr->num_normal_fifos; i++)
+ used_fifos |= _BN(fg_ptr->fifos[i].global_fifo_id);
+
+ TRACE((
+ KERN_INFO "bgpdma: Kernel_RecFifoGetFifoGroup() enabling reception FIFO interrupts\n"));
+ local_irq_save(flags);
+
+ bgcnsd.services->setDmaFifoControls(BGCNS_Enable,
+ BGCNS_ReceptionFifoInterrupt,
+ fg_ptr->group_id,
+ used_fifos,
+ NULL);
+
+ local_irq_restore(flags);
+
+
+ _bgp_msync();
+ _bgp_isync();
+
+
+
+ return 0;
+}
+EXPORT_SYMBOL(Kernel_RecFifoGetFifoGroup);
+
+/*
+ * Initialize a reception fifo
+ */
+int Kernel_RecFifoInitById( u32 * fg,
+ int fifo_id,
+ void * va_start,
+ void * va_head,
+ void * va_end )
+{
+ CHECK_DMA_ACCESS;
+
+ int ret;
+ uint32_t st_word, st_mask;
+ uint32_t x_phead, x_vtail, x_vstart, x_pstart;
+ int i, grp, g_fifo_id;
+ DMA_RecFifoGroup_t * fg_ptr = (DMA_RecFifoGroup_t *)fg;
+ uint32_t xint[4] = {0,0,0,0};
+
+ if ( fg_ptr == NULL ) return -EINVAL;
+ if ( fifo_id < 0 || fifo_id >= DMA_NUM_REC_FIFOS_PER_GROUP ) return -EINVAL;
+ if ( va_start >= va_end || va_start > va_head || va_head > va_end ) return -EINVAL;
+ if ( ((u32)va_start & 0x1F) != 0 ) return -EINVAL;
+ if ( ((u32)va_end & 0x1F) != 0 ) return -EINVAL;
+ if ( ((u32)va_head & 0xF) != 0 ) return -EINVAL;
+ /* if ( (u32)va_end - (u32)va_start < DMA_MIN_REC_FIFO_SIZE_IN_BYTES ) return -EINVAL; */
+
+ /*
+ * Note: The reception fifos are in a disabled state upon return from
+ * DMA_RecFifoSetMap(), so we assume they are disabled at this point,
+ * making it safe to set the start, head, etc.
+ */
+
+ /* NOTE: This assumes the interrupt enables have been previously set as desired, */
+ /* in _bgp_DMA_RecFifoGetFifoGroup, so we simply read those dcrs, disable all fifos, */
+ /* and write them back at the end */
+
+ grp = fg_ptr->group_id;
+ g_fifo_id = fg_ptr->fifos[fifo_id].global_fifo_id;
+
+ if ( g_fifo_id < DMA_NUM_NORMAL_REC_FIFOS) /* normal fifo */
+ {
+ st_word = 0; /* status word for this fifo */
+ st_mask = _BN(g_fifo_id) & fg_ptr->mask; /* status mask for this fifo */
+
+ /* see if this fifo has already been initialized */
+ if ((bgpdma_dev.state.rec_fifo_init[st_word] & _BN(g_fifo_id)) !=0 ) return -EBUSY;
+ /* Disable the FIFO and all interrupts (interrupts will be restored below) */
+ TRACE((
+ KERN_INFO "bgpdma: Kernel_RecFifoInitById() disabling reception FIFO interrupts\n"));
+ unsigned long flags;
+ local_irq_save(flags);
+ bgcnsd.services->setDmaFifoControls( BGCNS_Disable, BGCNS_ReceptionFifo, 0 /* group not used */, _BN(g_fifo_id), NULL );
+
+ for (i=0; i<4; i++)
+ bgcnsd.services->setDmaFifoControls( BGCNS_Disable, BGCNS_ReceptionFifoInterrupt, i, 0xFFFFFFFF, &(xint[i]) ); /* save for re-enablement below */
+ local_irq_restore(flags);
+ }
+ else /* header fifo */
+ {
+ st_word = 1; /* status word for this fifo */
+ st_mask = fg_ptr->mask; /* status mask for this fifo (only one bit is used by the HW) */
+
+ /* see if this fifo has already been initialized */
+ if ( (bgpdma_dev.state.rec_fifo_init[st_word] & _BN(g_fifo_id-32)) != 0 )
+ return -EBUSY;
+
+ /* remember that this fifo has been initialized */
+ bgpdma_dev.state.rec_fifo_init[st_word] |= _BN(g_fifo_id-32);
+
+ /* Disable the reception header FIFO and its interrupts */
+ TRACE((
+ KERN_INFO "bgpdma: Kernel_RecFifoInitById() disabling reception header FIFO interrupts\n"));
+ unsigned long flags;
+ local_irq_save(flags);
+ bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifo, grp, 0 /* mask not used */, NULL );
+ bgcnsd.services->setDmaFifoControls(BGCNS_Disable, BGCNS_ReceptionHeaderFifoInterrupt, 0, 0xFFFFFFFF, xint );
+ local_irq_restore(flags);
+
+ }
+
+ /* Initialize the fifo */
+ ret = FifoInit( &fg_ptr->fifos[fifo_id].dma_fifo, va_start, va_head, va_end );
+ CHECK_RET(ret);
+
+
+ /* remember that this fifo has been initialized */
+ if ( g_fifo_id < DMA_NUM_NORMAL_REC_FIFOS ) /* normal fifo */
+ bgpdma_dev.state.rec_fifo_init[0] |= _BN(g_fifo_id);
+ else /* header fifo */
+ bgpdma_dev.state.rec_fifo_init[1] |= _BN(g_fifo_id-32);
+
+ /* clear the threshold crossed */
+ fg_ptr->status_ptr->clear_threshold_crossed[st_word] = st_mask;
+ /* ret = put_user( st_mask, &fg_ptr->status_ptr->clear_threshold_crossed[st_word] ); */
+ /* CHECK_RET(ret); */
+
+ /* read back something from the dma to ensure all writes have occurred */
+ /* head should equal tail */
+ x_phead = fg_ptr->fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head;
+ /* ret = get_user( x_phead, &fg_ptr->fifos[fifo_id].dma_fifo.fifo_hw_ptr->pa_head ); */
+ /* CHECK_RET(ret); */
+ x_vstart = (uint32_t)fg_ptr->fifos[fifo_id].dma_fifo.va_start;
+ x_pstart = (uint32_t)fg_ptr->fifos[fifo_id].dma_fifo.pa_start;
+ x_vtail = (uint32_t)fg_ptr->fifos[fifo_id].dma_fifo.va_tail;
+ if ( x_vstart + ( (x_phead - x_pstart) << 4 ) != x_vtail ) return -EIO;
+
+ /* Enable the FIFO and re-enable interrupts */
+ unsigned long flags;
+ local_irq_save(flags);
+
+ if ( g_fifo_id < DMA_NUM_NORMAL_REC_FIFOS) { /* Normal fifo */
+ TRACE((
+ KERN_INFO "bgpdma: Kernel_RecFifoInitById() enabling reception FIFO interrupts\n"));
+ bgcnsd.services->setDmaFifoControls(BGCNS_Enable, BGCNS_ReceptionFifo, 0 /* group not used */, _BN(g_fifo_id), NULL);
+
+ for (i=0; i<4; i++)
+ bgcnsd.services->setDmaFifoControls(BGCNS_Reenable, BGCNS_ReceptionFifoInterrupt, i, 0 /* mask not used */, &(xint[i]) ); /* Restore saved state */
+ }
+ else { /* Header FIFO */
+ TRACE((
+ KERN_INFO "bgpdma: Kernel_RecFifoInitById() enabling reception header FIFO interrupts\n"));
+ bgcnsd.services->setDmaFifoControls(BGCNS_Enable, BGCNS_ReceptionHeaderFifo, grp, 0 /* mask not used */, NULL );
+ /* bgcnsd.services->setDmaFifoControls(BGCNS_Reenable, BGCNS_ReceptionHeaderFifoInterrupt, 0, 0, xint ); */
+ }
+
+ local_irq_restore(flags);
+
+ return 0;
+}
+EXPORT_SYMBOL(Kernel_RecFifoInitById);
+
+/*
+ * Register interrupt handlers
+ */
+int Kernel_SetCommThreadConfig(int irq,
+ int opcode,
+ LockBox_Counter_t cntrid,
+ Kernel_CommThreadHandler handler,
+ uint32_t arg1,
+ uint32_t arg2,
+ uint32_t arg3,
+ uint32_t arg4)
+{
+ int ret = 0;
+ int i;
+
+ CHECK_PARAM( arg2 == 0 && arg3 == 0 && arg4 == 0 );
+
+
+
+ for ( i = 0; i < MAX_NUM_IRQ; i++ )
+ if ( bgpdma_dev.irqInfo[i].irq == 0 || bgpdma_dev.irqInfo[i].irq == irq )
+ break;
+
+ if ( i == MAX_NUM_IRQ )
+ {
+ printk(KERN_INFO "bgpdma: Kernel_SetCommThreadConfig: No more irq info slot\n" );
+ return -ENOSPC;
+ }
+
+ bgpdma_dev.irqInfo[i].func = handler;
+ bgpdma_dev.irqInfo[i].arg1 = arg1;
+
+ if ( bgpdma_dev.irqInfo[i].irq == irq )
+ {
+ TRACE((
+ KERN_INFO "bgpdma: Kernel_SetCommThreadConfig: Re-registering handler "
+ "for irq:%d func:%08x arg1:%d\n",irq, (int)handler, arg1 ));
+ return 0;
+ }
+
+ bgpdma_dev.irqInfo[i].irq = irq;
+
+/* bgp_dma_irq = irq ; */
+#if defined(TORNIC_TORUS_AFFINITY)
+ bic_set_cpu_for_irq(irq,k_TorusAffinityCPU) ;
+ TRACE((
+ KERN_INFO "bgpdma: setting affinity irq=%d affinity=%d\n",irq, k_TorusAffinityCPU ));
+#endif
+
+
+ ret = request_irq(irq,
+ dmaIrqHandler,
+ IRQF_DISABLED,
+ BGP_DMA_NAME,
+ &bgpdma_dev.irqInfo[i]);
+
+ TRACE((
+ KERN_INFO "bgpdma: request_irq irq=%d i=%d func=%p arg1=%08x ret=%d\n",irq, i, handler, arg1, ret ));
+ CHECK_RET(ret);
+
+ TRACE((
+ KERN_INFO "bgpdma: Kernel_SetCommThreadConfig() finished\n"));
+ return ret;
+}
+
+EXPORT_SYMBOL(Kernel_SetCommThreadConfig) ;
+
+/*
+ * Remove commthread from the run queue ... not implemented
+ */
+int pthread_poof_np( void )
+{
+ printk(KERN_INFO "bgpdma: pthread_poof_np() called !!! (bgp_dma.c:%d)\n",
+ __LINE__);
+ return 0;
+}
diff --git a/arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c b/arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c
new file mode 100644
index 00000000000000..6f96f188276b46
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/spi/DMA_Descriptors.c
@@ -0,0 +1,1588 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2006,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/*!
+ * \file DMA_Descriptors.c
+ *
+ * \brief Implementations for Functions defined in bgp/arch/include/spi/DMA_Descriptors.h
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+
+#ifndef __LINUX_KERNEL__
+
+#include <bpcore/bgp_types.h>
+
+/*!
+ * \brief For kernel_interface.h so that rts_get_personality gets defined
+ */
+#define SPI_DEPRECATED 1
+#include <spi/kernel_interface.h>
+
+#include <spi/DMA_Descriptors.h>
+#include <spi/DMA_Counter.h>
+#include <spi/DMA_InjFifo.h>
+#include <spi/DMA_RecFifo.h>
+
+#include <spi/DMA_Assert.h>
+
+#ifdef __CNK__
+#include <cnk/PersUtils.h>
+#endif
+
+#else
+
+#include <spi/linux_kernel_spi.h>
+
+#endif /* ! __LINUX_KERNEL__ */
+
+
+/*!
+ * \brief Static Info from Personality
+ *
+ * The following structure defines information from the personality.
+ * They are intended to be static so, once the info is retrieved from
+ * the personality, it does not need to be retrieved again (it is a
+ * system call to retrieve personality info).
+ *
+ * It is assumed that this is initialized to zero when the program is
+ * loaded.
+ *
+ */
+static DMA_PersonalityInfo_t personality_info;
+
+
+/*!
+ * \brief Get Personality Information
+ *
+ * Gets personality information into the "personality_info" static structure.
+ *
+ * \post The personality information is retrieved into the structure
+ *
+ */
+void DMA_GetPersonalityInfo(void)
+{
+ _BGP_Personality_t *pers_ptr;
+
+#ifndef __CNK__
+
+ _BGP_Personality_t pers;
+
+ rts_get_personality( &pers,
+ sizeof(pers) );
+
+ pers_ptr = &pers;
+
+#else
+
+ pers_ptr = _bgp_GetPersonality();
+
+#endif
+
+ personality_info.nodeXCoordinate = pers_ptr->Network_Config.Xcoord;
+ personality_info.nodeYCoordinate = pers_ptr->Network_Config.Ycoord;
+ personality_info.nodeZCoordinate = pers_ptr->Network_Config.Zcoord;
+ personality_info.xNodes = pers_ptr->Network_Config.Xnodes;
+ personality_info.yNodes = pers_ptr->Network_Config.Ynodes;
+ personality_info.zNodes = pers_ptr->Network_Config.Znodes;
+
+ _bgp_msync(); /* Ensure the info has been stored before setting the flag */
+ personality_info.personalityRetrieved = 1;
+ _bgp_msync();
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Direct Put Message
+ *
+ * A torus direct put message is one that is sent to another node and its data
+ * is directly put into memory by the DMA on the destination node...it does
+ * not go into a reception fifo.
+ *
+ * A torus direct-put DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = DMA_CSUM_SKIP.
+ * - Sk = DMA_CSUM_BIT.
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 1 (Indicates a direct-put packet).
+ * - Dynamic = Set based on caller's "vc".
+ * - VC = Set to caller's "vc".
+ * - X,Y,Z = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = Destination message offset (from the reception
+ * counter's base address). Set to caller's recv_offset.
+ * - rDMA_Counter = Reception counter ID. This counter is located on the
+ * destination node and contains the base address of the
+ * message and the message length. Set based on caller's
+ * recv_ctr_grp_id and recv_ctr_id.
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] x The destination's x coordinate (8 bits).
+ * \param[in] y The destination's y coordinate (8 bits).
+ * \param[in] z The destination's z coordinate (8 bits).
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Both of x+ and x- cannot be set at the same
+ * time...same with y and z.
+ * \param[in] vc The virtual channel that the packet must go
+ * into if it fails to win the bypass
+ * arbitration in the receiving node.
+ * - 0 = Virtual channel dynamic 0
+ * - 1 = Virtual channel dynamic 1
+ * - 2 = Virtual channel deterministic bubble
+ * - 3 = Virtual channel deterministic priority
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_ctr_grp_id Reception counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] recv_ctr_id Reception counter ID (within the recv counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] recv_offset Offset of the payload from the pa_base
+ * associated with the specified reception
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ * checksum. In the first byte of the torus hardware packet header,
+ * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ * The defaults can be changed by changing DMA_CSUM_SKIP and
+ * DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_ctr_grp_id:
+ * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+*/
+int DMA_TorusDirectPutDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int x,
+ unsigned int y,
+ unsigned int z,
+ unsigned int hints,
+ unsigned int vc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_ctr_grp_id,
+ unsigned int recv_ctr_id,
+ unsigned int recv_offset,
+ unsigned int msg_len
+ )
+{
+ int c;
+
+ SPI_assert( desc != NULL );
+ SPI_assert( (hints & 0x0000003F) == hints );
+ SPI_assert( vc <= 3 );
+ SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+ SPI_assert( recv_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( recv_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+
+#ifndef NDEBUG
+
+ if ( personality_info.personalityRetrieved == 0 )
+ {
+ DMA_GetPersonalityInfo();
+ }
+
+ SPI_assert( x < personality_info.xNodes );
+ SPI_assert( y < personality_info.yNodes );
+ SPI_assert( z < personality_info.zNodes );
+
+#endif
+
+ DMA_ZeroOutDescriptor(desc);
+
+ desc->idma_counterId =
+ inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+
+ desc->base_offset = send_offset;
+ desc->msg_length = msg_len;
+
+ /* Torus Headers */
+
+ desc->hwHdr.CSum_Skip = DMA_CSUM_SKIP; /* Checksum all but header */
+ desc->hwHdr.Sk = DMA_CSUM_BIT; /* Checksum entire packet */
+ desc->hwHdr.Hint = hints; /* Hint Bits from caller */
+
+ DMA_SetDescriptorPids( desc,
+ recv_ctr_grp_id ); /* Pids based on recv group id */
+
+ c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */
+ /* packet. */
+ SPI_assert( c!=0 );
+ desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */
+ /* 7 for 8 chunks). */
+
+ desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */
+
+ DMA_SetVc( desc,
+ vc ); /* Virtual channel & Dynamic. */
+
+ desc->hwHdr.X = x; /* Destination coordinates */
+ desc->hwHdr.Y = y;
+ desc->hwHdr.Z = z;
+
+ desc->hwHdr.Put_Offset = recv_offset;
+ desc->hwHdr.rDMA_Counter =
+ recv_ctr_id + recv_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP);
+
+ /* Note: The desc->hwHrd3.Payload_Bytes field is set by the iDMA */
+
+#ifdef DEBUG_MSG
+ Dump_InjDescriptor(desc);
+#endif
+
+ return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Direct Put Message
+ *
+ * A local direct put message is one that is targeted within the same node, and
+ * its data is directly put into memory by the DMA...it does not go into a
+ * reception fifo. This is essentially a memcpy via DMA.
+ *
+ * A local direct-put DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 1
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used).
+ * - Sk = 0 (not used).
+ * - Hint = 0 (not used).
+ * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id".
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 1 (Indicates a direct-put packet).
+ * - Dynamic = 0 (not used).
+ * - VC = 0 (not used).
+ * - X,Y,Z = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = Destination message offset (from the reception
+ * counter's base address). Set to caller's recv_offset.
+ * - rDMA_Counter = Reception counter ID. This counter is located on the
+ * destination node and contains the base address of the
+ * message and the message length.. Set based on caller's
+ * recv_ctr_grp_id and recv_ctr_id.
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_ctr_grp_id Reception counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] recv_ctr_id Reception counter ID (within the recv counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] recv_offset Offset of the payload from the pa_base
+ * associated with the specified reception
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_ctr_grp_id:
+ * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ * The only use for the pid bits is for debug, ie, if headers are
+ * being saved.
+ */
+int DMA_LocalDirectPutDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_ctr_grp_id,
+ unsigned int recv_ctr_id,
+ unsigned int recv_offset,
+ unsigned int msg_len
+ )
+{
+ int c;
+
+ SPI_assert( desc != NULL );
+ SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+ SPI_assert( recv_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( recv_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+
+ DMA_ZeroOutDescriptor(desc);
+
+ desc->local_memcopy = 1; /* 1 bit */
+
+ desc->idma_counterId =
+ inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+
+ desc->base_offset = send_offset;
+ desc->msg_length = msg_len;
+
+ /* Torus Headers */
+
+ DMA_SetDescriptorPids( desc,
+ recv_ctr_grp_id );
+
+ c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */
+ /* packet. */
+ SPI_assert( c!=0 );
+ desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */
+ /* 7 for 8 chunks). */
+
+ desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */
+
+ desc->hwHdr.Put_Offset = recv_offset;
+ desc->hwHdr.rDMA_Counter =
+ recv_ctr_id + recv_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP);
+
+ /* Note: The desc->hwHrd3.Payload_Bytes field is set by the iDMA */
+
+#ifdef DEBUG_MSG
+ Dump_InjDescriptor(desc);
+#endif
+
+ return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local L3 Prefetch Only Message
+ *
+ * A local prefetch is one in which the DMA simply prefetches the send buffer
+ * into L3.
+ *
+ * A local prefetch DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 1
+ * - local_memcopy = 1
+ * - idma_counterId = Injection counter ID associated with the message being
+ * prefetched. This counter contains the base address of
+ * the message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used).
+ * - Sk = 0 (not used).
+ * - Hint = 0 (not used).
+ * - Pid0, Pid1 = 0 (not used).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 1 (Indicates a DMA packet).
+ * - Dynamic = 0 (not used).
+ * - VC = 0 (not used).
+ * - X,Y,Z = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = 0 (not used).
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ */
+int DMA_LocalPrefetchOnlyDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int msg_len
+ )
+{
+ int c;
+
+ SPI_assert( desc != NULL );
+ SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+
+ DMA_ZeroOutDescriptor(desc);
+
+ desc->local_memcopy = 1; /* 1 bit */
+ desc->prefetch_only = 1; /* 1 bit */
+
+ desc->idma_counterId =
+ inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+
+ desc->base_offset = send_offset;
+ desc->msg_length = msg_len;
+
+ /* Torus Headers */
+ c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */
+ /* packet. */
+ SPI_assert( c!=0 );
+ desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */
+ /* 7 for 8 chunks). */
+
+ desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */
+
+#ifdef DEBUG_MSG
+ Dump_InjDescriptor(desc);
+#endif
+
+ return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Remote-Get Message
+ *
+ * A torus remote-get message is one that is sent to another node and its data
+ * is directly put by the DMA into an injection fifo on the destination
+ * node...it does not go into a reception fifo. Therefore, the payload of this
+ * message is one (or more) descriptors for another message that is to be sent
+ * back to the originating node.
+ *
+ * By default, we assume that the payload of this remote get packet is a single
+ * descriptor. Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32.
+ * For remote gets whose payload is greater than 1 descriptor, the caller can
+ * change the packet Chunks and msg_length after this function builds the
+ * default descriptor.
+ *
+ * It is also assumed that the payload is NOT checksummed, since it is not
+ * always reproducible. Things like idma_counterId and base_offset may be
+ * different on another run, making checksumming inconsistent.
+ *
+ * A torus remote-get DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = 32.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used because Sk is 1).
+ * - Sk = 1 (do not checksum this packet).
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_inj_fifo_id" (see note).
+ * - Chunks = Set to (2)-1 = 1.
+ * - Dm = 1 (Indicates a DMA packet).
+ * - Dynamic = Set based on caller's "vc".
+ * - VC = Set to caller's "vc".
+ * - X,Y,Z = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 1.
+ * - iDMA_Fifo_ID = Injection fifo ID where the payload will be injected.
+ * Set based on caller's recv_inj_ctr_grp_id and
+ * recv_inj_ctr_id.
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] x The destination's x coordinate (8 bits).
+ * \param[in] y The destination's y coordinate (8 bits).
+ * \param[in] z The destination's z coordinate (8 bits).
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Both of x+ and x- cannot be set at the same
+ * time...same with y and z.
+ * \param[in] vc The virtual channel that the packet must go
+ * into if it fails to win the bypass
+ * arbitration in the receiving node.
+ * - 0 = Virtual channel dynamic 0
+ * - 1 = Virtual channel dynamic 1
+ * - 2 = Virtual channel deterministic bubble
+ * - 3 = Virtual channel deterministic priority
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_inj_fifo_grp_id Injection fifo group ID where payload
+ * will be injected on destination node
+ * (0 to DMA_NUM_INJ_FIFO_GROUPS-1).
+ * \param[in] recv_inj_fifo_id Injection fifo ID (within the
+ * recv_inj_fifo_grp_id group)
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_inj_fifo_grp_id:
+ * - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+ */
+int DMA_TorusRemoteGetDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int x,
+ unsigned int y,
+ unsigned int z,
+ unsigned int hints,
+ unsigned int vc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_inj_fifo_grp_id,
+ unsigned int recv_inj_fifo_id
+ )
+{
+
+ SPI_assert( desc != NULL );
+ SPI_assert( (hints & 0x0000003F) == hints );
+ SPI_assert( vc <= 3 );
+ SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+ SPI_assert( recv_inj_fifo_grp_id < DMA_NUM_INJ_FIFO_GROUPS );
+ SPI_assert( recv_inj_fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP );
+
+#ifndef NDEBUG
+
+ if ( personality_info.personalityRetrieved == 0 )
+ {
+ DMA_GetPersonalityInfo();
+ }
+
+ SPI_assert( x < personality_info.xNodes );
+ SPI_assert( y < personality_info.yNodes );
+ SPI_assert( z < personality_info.zNodes );
+
+#endif
+
+ DMA_ZeroOutDescriptor(desc);
+
+ desc->idma_counterId =
+ inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+
+ desc->base_offset = send_offset;
+ desc->msg_length = 32;
+
+ /* Torus Headers */
+
+ desc->hwHdr.Sk = 1; /* Don't checksum this packet */
+
+ desc->hwHdr.Hint = hints; /* Hint Bits from caller */
+
+ DMA_SetDescriptorPids( desc,
+ recv_inj_fifo_grp_id ); /* Pids based on recv fifo */
+ /* group id */
+
+ desc->hwHdr.Chunks = 1; /* Size in Chunks of 32B 1 => 64 bytes */
+ desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */
+
+ DMA_SetVc(desc,vc); /* Set virtual channel and dynamic */
+
+ desc->hwHdr.X = x; /* Destination coordinates */
+ desc->hwHdr.Y = y;
+ desc->hwHdr.Z = z;
+
+ desc->hwHdr.Flags = 0x1; /* Flags[7]=Remote-Get */
+ desc->hwHdr.iDMA_Fifo_ID = /* Destination inj fifo ID */
+ recv_inj_fifo_id + ( recv_inj_fifo_grp_id * DMA_NUM_INJ_FIFOS_PER_GROUP );
+
+#ifdef DEBUG_MSG
+ Dump_InjDescriptor(desc);
+#endif
+
+ return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Remote-Get Message
+ *
+ * A local remote-get message is one whose data is directly put by the DMA into
+ * an injection fifo on the local node...it does not go into a reception fifo.
+ * Therefore, the payload of this message is one (or more) descriptors for
+ * another message that is to be injected on the local node.
+ *
+ * By default, we assume that the payload of this remote get packet is a single
+ * descriptor. Thus, Chunks = (2)-1 (64 byte packet) and msg_length = 32.
+ * For remote gets whose payload is greater than 1 descriptor, the caller can
+ * change the packet Chunks and msg_length after this function builds the
+ * default descriptor.
+ *
+ * A local remote-get DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 1
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = 32.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used).
+ * - Sk = 0 (not used).
+ * - Hint = 0 (Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_inj_fifo_id" (see note).
+ * - Chunks = Set to (2)-1 = 1.
+ * - Dm = 1 (Indicates a DMA packet).
+ * - Dynamic = 0 (not used).
+ * - VC = 0 (not used).
+ * - X,Y,Z = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 1.
+ * - iDMA_Fifo_ID = Injection fifo ID where the payload will be injected.
+ * Set based on caller's inj_ctr_grp_id and inj_ctr_id.
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_inj_fifo_grp_id Injection fifo group ID where payload
+ * will be injected on local node
+ * (0 to DMA_NUM_INJ_FIFO_GROUPS-1).
+ * \param[in] recv_inj_fifo_id Injection fifo ID (within the
+ * recv_inj_fifo_grp_id group)
+ * (0 to DMA_NUM_INJ_FIFOS_PER_GROUP-1).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, for remote-get DMA messages, the pid0 and pid1 bits in the
+ * hardware packet header are determined by the recv_inj_fifo_grp_id:
+ * - if recv_inj_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_inj_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_inj_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_inj_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ *
+ */
+int DMA_LocalRemoteGetDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_inj_fifo_grp_id,
+ unsigned int recv_inj_fifo_id
+ )
+{
+
+ SPI_assert( desc != NULL );
+ SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+ SPI_assert( recv_inj_fifo_grp_id < DMA_NUM_INJ_FIFO_GROUPS );
+ SPI_assert( recv_inj_fifo_id < DMA_NUM_INJ_FIFOS_PER_GROUP );
+
+ DMA_ZeroOutDescriptor(desc);
+
+ desc->local_memcopy = 1; /* 1 bit */
+
+ desc->idma_counterId =
+ inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+ desc->base_offset = send_offset;
+ desc->msg_length = 32;
+
+ /* Torus Headers */
+ DMA_SetDescriptorPids( desc,
+ recv_inj_fifo_grp_id ); /* Pids based on recv fifo */
+ /* group id */
+
+ desc->hwHdr.Chunks = 1; /* Size in Chunks of 32B 1 => 64 bytes */
+ desc->hwHdr.Dm = 1; /* 1=DMA Mode, 0=Fifo Mode */
+
+ desc->hwHdr.Flags = 0x1; /* Flags[7]=Remote-Get */
+ desc->hwHdr.iDMA_Fifo_ID = /* Destination inj fifo ID */
+ recv_inj_fifo_id + ( recv_inj_fifo_grp_id * DMA_NUM_INJ_FIFOS_PER_GROUP );
+
+ return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Memory Fifo Message
+ *
+ * A torus memory fifo message is one that is sent to another node and its data
+ * is put into a reception memory fifo by the DMA on the destination node.
+ *
+ * A torus memory fifo DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = DMA_CSUM_SKIP.
+ * - Sk = DMA_CSUM_BIT.
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 0 (Indicates a memory fifo packet).
+ * - Dynamic = Set based on caller's "vc".
+ * - VC = Set to caller's "vc".
+ * - X,Y,Z = Set to caller's "x", "y", "z".
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (initialized to 0, and unchanged in the first packet.
+ * Increased by 240 in each subsequent packet, reflecting
+ * the number of bytes transferred in all previous
+ * packets).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = 0 (not used).
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg.
+ * - Func_Id = The registration ID of a function to receive control
+ * on the destination node to process the packet.
+ * Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] x The destination's x coordinate (8 bits).
+ * \param[in] y The destination's y coordinate (8 bits).
+ * \param[in] z The destination's z coordinate (8 bits).
+ * \param[in] recv_fifo_grp_id Reception fifo group ID
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Both of x+ and x- cannot be set at the same
+ * time...same with y and z.
+ * \param[in] vc The virtual channel that the packet must go
+ * into if it fails to win the bypass
+ * arbitration in the receiving node.
+ * - 0 = Virtual channel dynamic 0
+ * - 1 = Virtual channel dynamic 1
+ * - 2 = Virtual channel deterministic bubble
+ * - 3 = Virtual channel deterministic priority
+ * \param[in] sw_arg User-defined 24 bits to be placed into the
+ * packets (bits 8-31).
+ * \param[in] function_id Function id (8 bit registration ID) of the
+ * function to receive control on the
+ * destination node to process packets for this
+ * message.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ * checksum. In the first byte of the torus hardware packet header,
+ * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ * The defaults can be changed by changing DMA_CSUM_SKIP and
+ * DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_fifo_grp_id:
+ * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+*/
+int DMA_TorusMemFifoDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int x,
+ unsigned int y,
+ unsigned int z,
+ unsigned int recv_fifo_grp_id,
+ unsigned int hints,
+ unsigned int vc,
+ unsigned int sw_arg,
+ unsigned int function_id,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int msg_len
+ )
+{
+ int c;
+
+ SPI_assert( desc != NULL );
+ SPI_assert( (hints & 0x0000003F) == hints );
+ SPI_assert( vc <= 3 );
+ SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+ SPI_assert( recv_fifo_grp_id < DMA_NUM_REC_FIFO_GROUPS );
+
+#ifndef NDEBUG
+
+ if ( personality_info.personalityRetrieved == 0 )
+ {
+ DMA_GetPersonalityInfo();
+ }
+
+ SPI_assert( x < personality_info.xNodes );
+ SPI_assert( y < personality_info.yNodes );
+ SPI_assert( z < personality_info.zNodes );
+
+#endif
+
+ DMA_ZeroOutDescriptor(desc);
+
+ desc->idma_counterId =
+ inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+
+ desc->base_offset = send_offset;
+ desc->msg_length = msg_len;
+
+ /* Torus Headers */
+
+ desc->hwHdr.CSum_Skip = DMA_CSUM_SKIP; /* Checksum all but header */
+ desc->hwHdr.Sk = DMA_CSUM_BIT; /* Checksum entire packet */
+ desc->hwHdr.Hint = hints; /* Hint Bits from caller */
+
+ DMA_SetDescriptorPids( desc,
+ recv_fifo_grp_id ); /* Pids based on recv group id */
+
+ c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */
+ /* packet. */
+ SPI_assert( c!=0 );
+ desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */
+ /* 7 for 8 chunks). */
+
+ DMA_SetVc( desc,
+ vc ); /* Virtual channel & Dynamic. */
+
+ desc->hwHdr.X = x; /* Destination coordinates */
+ desc->hwHdr.Y = y;
+ desc->hwHdr.Z = z;
+
+ desc->hwHdr.SW_Arg = sw_arg; /* User-defined */
+ desc->hwHdr.Func_Id = function_id; /* Registration id */
+
+#ifdef DEBUG_MSG
+ Dump_InjDescriptor(desc);
+#endif
+
+ return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Local Memory Fifo Message
+ *
+ * A local memory fifo message is one whose data is put into a reception
+ * memory fifo on the same node by the DMA.
+ *
+ * A local memory fifo DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 1
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = 0 (not used).
+ * - Sk = 0 (not used).
+ * - Hint = 0 (not used).
+ * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 0 (Indicates a memory fifo packet).
+ * - Dynamic = 0 (not used).
+ * - VC = 0 (not used).
+ * - X,Y,Z = 0 (not used).
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = 0 (not used).
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg.
+ * - Func_Id = The registration ID of a function to receive control
+ * on this local node to process the packet.
+ * Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] recv_fifo_grp_id Reception fifo group ID
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in] sw_arg User-defined 24 bits to be placed into the
+ * packets (bits 8-31).
+ * \param[in] function_id Function id (8 bit registration ID) of the
+ * function to receive control on this
+ * local node to process packets for this
+ * message.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_fifo_grp_id:
+ * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+*/
+int DMA_LocalMemFifoDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int recv_fifo_grp_id,
+ unsigned int sw_arg,
+ unsigned int function_id,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int msg_len
+ )
+{
+ int c;
+
+ SPI_assert( desc != NULL );
+ SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+ SPI_assert( recv_fifo_grp_id < DMA_NUM_REC_FIFO_GROUPS );
+
+ DMA_ZeroOutDescriptor(desc);
+
+ desc->local_memcopy = 1; /* 1 bit */
+
+ desc->idma_counterId =
+ inj_ctr_id + inj_ctr_grp_id*(DMA_NUM_COUNTERS_PER_GROUP); /* 8 bits */
+
+ desc->base_offset = send_offset;
+ desc->msg_length = msg_len;
+
+ /* Torus Headers */
+ DMA_SetDescriptorPids( desc,
+ recv_fifo_grp_id ); /* Pids based on recv group id */
+
+ c = DMA_PacketChunks(msg_len); /* Calculate number of 32B chunks in first */
+ /* packet. */
+ SPI_assert( c!=0 );
+ desc->hwHdr.Chunks = c - 1; /* Packet header has 0 for 1 chunk, ... , */
+ /* 7 for 8 chunks). */
+
+ desc->hwHdr.SW_Arg = sw_arg; /* User-defined */
+ desc->hwHdr.Func_Id = function_id; /* Registration id */
+
+#ifdef DEBUG_MSG
+ Dump_InjDescriptor(desc);
+#endif
+
+ return 0;
+}
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Direct Put Broadcast Message
+ *
+ * A torus direct put broadcast message is one that is sent to all of the nodes
+ * in a specified direction along a specified line, its data
+ * is directly put into memory on the nodes along that line by the DMA on those
+ * nodes...it does not go into a reception fifo. Only one hint bit can be
+ * specified, dictating the direction (plus or minus) and line (x, y, or z).
+ *
+ * By default, the packet is included in the checksum. Retransmitted packets
+ * should not be included in the checksum.
+ *
+ * By default, the deterministic bubble normal virtual channel is used.
+ *
+ * A torus direct-put broadcast DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = DMA_CSUM_SKIP.
+ * - Sk = DMA_CSUM_BIT.
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_ctr_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 1 (Indicates a direct-put packet).
+ * - Dynamic = 0 (Deterministic).
+ * - VC = Virtual Channel: Deterministic Bubble Normal.
+ * - X,Y,Z = Set according to the hints:
+ * Two of the directions are set to this node's
+ * coordinates (no movement in those directions).
+ * One direction is set to the dest specified
+ * by the caller.
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = Destination message offset (from the reception
+ * counter's base address). Set to caller's recv_offset.
+ * - rDMA_Counter = Reception counter ID. This counter is located on the
+ * destination node and contains the base address of the
+ * message and the message length. Set based on caller's
+ * recv_ctr_grp_id and recv_ctr_id.
+ * - Payload_Bytes = Number of valid bytes in the payload. Set by iDMA.
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - Func_Id = 0 (not used).
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] dest The final torus destination coordinate
+ * along the line specified by the hints.
+ * Should not exceed the number of nodes in
+ * the direction of travel.
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Only one bit may be specified.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] recv_ctr_grp_id Reception counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] recv_ctr_id Reception counter ID (within the recv counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] recv_offset Offset of the payload from the pa_base
+ * associated with the specified reception
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ * checksum. In the first byte of the torus hardware packet header,
+ * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ * The defaults can be changed by changing DMA_CSUM_SKIP and
+ * DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_ctr_grp_id:
+ * - if recv_ctr_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_ctr_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_ctr_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_ctr_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+*/
+int DMA_TorusDirectPutBcastDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int dest,
+ unsigned int hints,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int recv_ctr_grp_id,
+ unsigned int recv_ctr_id,
+ unsigned int recv_offset,
+ unsigned int msg_len
+ )
+{
+
+ int dest_x,dest_y,dest_z;
+
+ SPI_assert( desc != NULL );
+ SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+ SPI_assert( recv_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( recv_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+
+ /*
+ * Previous code to retrieve our node's x,y,z coords:
+ * BGLPartitionGetCoords( &dest_x, &dest_y, &dest_z );
+ *
+ * If the node's x,y,z coordinates have not yet been retrieved from the
+ * personality, go get the personality and set the DMA_NodeXCoordinate,
+ * DMA_NodeYCoordinate, and DMA_NodeZCoordinate static variables from
+ * the personality info. Then, use this to init the dest_x,y,z variables.
+ */
+ if ( personality_info.personalityRetrieved == 0 )
+ {
+ DMA_GetPersonalityInfo();
+ }
+
+ dest_x = personality_info.nodeXCoordinate;
+ dest_y = personality_info.nodeYCoordinate;
+ dest_z = personality_info.nodeZCoordinate;
+
+ /*
+ * Examine the hint bits specified by the caller:
+ * - Ensure only one of them is specified
+ * - Ensure dest is valid for the direction of the broadcast
+ * - Override x, y, or z with dest for the specified direction
+ */
+
+ switch(hints) {
+
+ case DMA_PACKET_HINT_XP:
+ case DMA_PACKET_HINT_XM:
+ dest_x = dest;
+ SPI_assert( dest <= personality_info.xNodes );
+ break;
+
+ case DMA_PACKET_HINT_YP:
+ case DMA_PACKET_HINT_YM:
+ dest_y = dest;
+ SPI_assert( dest <= personality_info.yNodes );
+ break;
+
+ case DMA_PACKET_HINT_ZP:
+ case DMA_PACKET_HINT_ZM:
+ dest_z = dest;
+ SPI_assert( dest <= personality_info.zNodes );
+ break;
+
+ default:
+ SPI_assert(0);
+
+ }
+
+ /* Build the descriptor */
+ DMA_TorusDirectPutDescriptor(desc,
+ dest_x,
+ dest_y,
+ dest_z,
+ hints,
+ DMA_PACKET_VC_BN,
+ inj_ctr_grp_id,
+ inj_ctr_id,
+ send_offset,
+ recv_ctr_grp_id,
+ recv_ctr_id,
+ recv_offset,
+ msg_len);
+
+ /* set the deposit bit */
+ desc->hwHdr.Dp =1;
+
+
+ return 0;
+}
+
+
+
+
+/*!
+ * \brief Create a DMA Descriptor For a Torus Memory Fifo Broadcast Message
+ *
+ * A torus memory fifo broadcast message is one that is sent to all of the nodes
+ * in a specified direction along a specified line, its data is
+ * put into a reception memory fifo by the DMA on the destination nodes along
+ * that line. Only one hint bit can be specified, dictating the direction
+ * (plus or minus) and line (x, y, or z).
+ *
+ * By default, the packet is included in the checksum. Retransmitted packets
+ * should not be included in the checksum.
+ *
+ * By default, the deterministic bubble normal virtual channel is used.
+ *
+ * A torus memory fifo broadcast DMA descriptor contains the following:
+ *
+ * - 16 bytes of control information:
+ * - prefetch_only = 0
+ * - local_memcopy = 0
+ * - idma_counterId = Injection counter ID associated with the data being
+ * sent. This counter contains the base address of the
+ * message and the message length. Set based on caller's
+ * inj_ctr_grp_id and inj_ctr_id.
+ * - base_offset = Message offset (from the injection counter's base
+ * address). Set to caller's send_offset.
+ * - msg_length = Message length. Set to caller's msg_len.
+ *
+ * - 8 byte torus hardware header
+ * - CSum_Skip = DMA_CSUM_SKIP.
+ * - Sk = DMA_CSUM_BIT.
+ * - Hint = Set to caller's "hints".
+ * - Pid0, Pid1 = Set based on caller's "recv_fifo_grp_id" (see note).
+ * - Chunks = Set to largest size consistent with msg_len.
+ * - Dm = 0 (Indicates a memory fifo packet).
+ * - Dynamic = 0 (Deterministic).
+ * - VC = Virtual Channel: Deterministic Bubble Normal.
+ * - X,Y,Z = Set according to the hints:
+ * Two of the directions are set to this node's
+ * coordinates (no movement in those directions).
+ * One direction is set to the dest specified
+ * by the caller.
+ *
+ * - 8 byte software header (initial values used by iDMA).
+ * - Put_Offset = 0 (not used).
+ * - rDMA_Counter = 0 (not used).
+ * - Payload_Bytes = 0 (not used).
+ * - Flags = Pacing = 0.
+ * Remote-Get = 0.
+ * - iDMA_Fifo_ID = 0 (not used).
+ * - SW_Arg = User-defined 24 bits. Set to caller's sw_arg.
+ * - Func_Id = The registration ID of a function to receive control
+ * on the destination node to process the packet.
+ * Set to caller's function_id.
+ *
+ * This function creates the above descriptor.
+ *
+ * \param[in,out] desc Pointer to the storage where the descriptor
+ * will be created.
+ * \param[in] dest The final torus destination coordinate
+ * along the line specified by the hints.
+ * Should not exceed the number of nodes in
+ * the direction of travel.
+ * \param[in] recv_fifo_grp_id Reception fifo group ID
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * \param[in] hints Hint bits for torus routing (6 bits).
+ * Each bit corresponds to x+, x-, y+, y-,
+ * z+, z-. If a bit is set, it indicates that
+ * the packet wants to travel along the
+ * corresponding direction. If all bits are
+ * zero, the hardware calculates the hint bits.
+ * Only one bit may be specified.
+ * \param[in] sw_arg User-defined 24 bits to be placed into the
+ * packets (bits 8-31).
+ * \param[in] function_id Function id (8 bit registration ID) of the
+ * function to receive control on the
+ * destination node to process packets for this
+ * message.
+ * \param[in] inj_ctr_grp_id Injection counter group ID
+ * (0 to DMA_NUM_COUNTER_GROUPS-1).
+ * \param[in] inj_ctr_id Injection counter ID (within the inj counter
+ * group) (0 to DMA_NUM_COUNTERS_PER_GROUP-1).
+ * \param[in] send_offset Offset of the send payload from the pa_base
+ * associated with the specified injection
+ * counter.
+ * \param[in] msg_len Total message length (in bytes).
+ *
+ * \retval 0 Success
+ * \retval non-zero Failure
+ *
+ * \note By default, all payload bytes are included in the torus injection
+ * checksum. In the first byte of the torus hardware packet header,
+ * this corresponds to setting CSum_Skip = 0x8 (16 bytes) and Sk=0.
+ * The defaults can be changed by changing DMA_CSUM_SKIP and
+ * DMA_CSUM_BIT in this include file.
+ *
+ * \note By default, the packet size is set to the largest value consistent
+ * with the message size. For example,
+ * - if msg_len >= 209, there will be 8 32-byte chunks in each packet,
+ * with the possible exception of the last packet, which could contain
+ * fewer chunks (209... of payload + 16 header).
+ * - if 177 <= msg_len < 208, there will be 7 chunks in the packet, etc.
+ *
+ * \note By default, for direct-put DMA messages, the pid0 and pid1 bits in the
+ * torus hardware packet header are determined by the recv_fifo_grp_id:
+ * - if recv_fifo_grp_id = 0 => (pid0,pid1) = (0,0)
+ * - if recv_fifo_grp_id = 1 => (pid0,pid1) = (0,1)
+ * - if recv_fifo_grp_id = 2 => (pid0,pid1) = (1,0)
+ * - if recv_fifo_grp_id = 3 => (pid0,pid1) = (1,1)
+ * Pid0 determines into which physical torus fifo group on the destination
+ * node the packet is put, prior to the dma receiving it. Other than that,
+ * the only use for the pid bits is for debug, ie, if headers are being
+ * saved.
+*/
+int DMA_TorusMemFifoBcastDescriptor(
+ DMA_InjDescriptor_t *desc,
+ unsigned int dest,
+ unsigned int recv_fifo_grp_id,
+ unsigned int hints,
+ unsigned int sw_arg,
+ unsigned int function_id,
+ unsigned int inj_ctr_grp_id,
+ unsigned int inj_ctr_id,
+ unsigned int send_offset,
+ unsigned int msg_len
+ )
+{
+ int dest_x,dest_y,dest_z;
+
+ SPI_assert( desc != NULL );
+ SPI_assert( inj_ctr_grp_id < DMA_NUM_COUNTER_GROUPS );
+ SPI_assert( inj_ctr_id < DMA_NUM_COUNTERS_PER_GROUP );
+ SPI_assert( recv_fifo_grp_id < DMA_NUM_COUNTER_GROUPS );
+
+ /*
+ * Previous code to retrieve our node's x,y,z coords:
+ * BGLPartitionGetCoords( &dest_x, &dest_y, &dest_z );
+ *
+ * If the node's x,y,z coordinates have not yet been retrieved from the
+ * personality, go get the personality and set the DMA_NodeXCoordinate,
+ * DMA_NodeYCoordinate, and DMA_NodeZCoordinate static variables from
+ * the personality info. Then, use this to init the dest_x,y,z variables.
+ */
+ if ( personality_info.personalityRetrieved == 0 )
+ {
+ DMA_GetPersonalityInfo();
+ }
+
+ dest_x = personality_info.nodeXCoordinate;
+ dest_y = personality_info.nodeYCoordinate;
+ dest_z = personality_info.nodeZCoordinate;
+
+ /*
+ * Examine the hint bits specified by the caller:
+ * - Ensure only one of them is specified
+ * - Ensure dest is valid for the direction of the broadcast
+ * - Override x, y, or z with dest for the specified direction
+ */
+
+ switch(hints) {
+
+ case DMA_PACKET_HINT_XP:
+ case DMA_PACKET_HINT_XM:
+ dest_x = dest;
+ SPI_assert( dest <= personality_info.xNodes );
+ break;
+
+ case DMA_PACKET_HINT_YP:
+ case DMA_PACKET_HINT_YM:
+ dest_y = dest;
+ SPI_assert( dest <= personality_info.yNodes );
+ break;
+
+ case DMA_PACKET_HINT_ZP:
+ case DMA_PACKET_HINT_ZM:
+ dest_z = dest;
+ SPI_assert( dest <= personality_info.zNodes );
+ break;
+
+ default:
+ SPI_assert(0);
+
+ }
+
+ /* Build the descriptor */
+ DMA_TorusMemFifoDescriptor(
+ desc,
+ dest_x,
+ dest_y,
+ dest_z,
+ recv_fifo_grp_id,
+ hints,
+ DMA_PACKET_VC_BN,
+ sw_arg,
+ function_id,
+ inj_ctr_grp_id,
+ inj_ctr_id,
+ send_offset,
+ msg_len);
+
+ /* set the deposit bit */
+ desc->hwHdr.Dp =1;
+
+
+ return 0;
+}
+EXPORT_SYMBOL(DMA_GetPersonalityInfo) ;
+EXPORT_SYMBOL(DMA_TorusDirectPutDescriptor) ;
+EXPORT_SYMBOL(DMA_LocalDirectPutDescriptor) ;
+EXPORT_SYMBOL(DMA_LocalPrefetchOnlyDescriptor) ;
+EXPORT_SYMBOL(DMA_TorusRemoteGetDescriptor) ;
+EXPORT_SYMBOL(DMA_LocalRemoteGetDescriptor) ;
+EXPORT_SYMBOL(DMA_TorusMemFifoDescriptor) ;
+EXPORT_SYMBOL(DMA_LocalMemFifoDescriptor) ;
+EXPORT_SYMBOL(DMA_TorusDirectPutBcastDescriptor) ;
+EXPORT_SYMBOL(DMA_TorusMemFifoBcastDescriptor) ;
diff --git a/arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c b/arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c
new file mode 100644
index 00000000000000..34c85501a121b2
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/spi/DMA_InjFifo.c
@@ -0,0 +1,206 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/*! \file DMA_InjFifo.c
+ *
+ * \brief Implementations for Functions Defined in bgp/arch/include/spi/DMA_InjFifo.h.
+ *
+ */
+
+#undef DEBUG_PRINT
+/* #define DEBUG_PRINT 1 */
+
+#ifndef __LINUX_KERNEL__
+
+#include <common/bgp_personality_inlines.h>
+#include <spi/bgp_SPI.h>
+#include <stdio.h>
+#include <errno.h>
+
+#else
+
+#include <spi/linux_kernel_spi.h>
+
+#endif /* ! __LINUX_KERNEL__ */
+
+/*!
+ *
+ * \brief Remote Get Fifo Full Handler Table
+ *
+ * An array of entries, one per injection fifo. Each entry specifies the fifo
+ * group structure and the handler function that will receive control to
+ * handle a remote get fifo full condition for fifos in that fifo group.
+ */
+DMA_InjFifoRgetFifoFullHandlerEntry_t DMA_RgetFifoFullHandlerTable[DMA_NUM_INJ_FIFOS];
+
+
+/*!
+ * \brief Remote Get Fifo Full Init Has Been Done Indicator
+ *
+ * 0 means the initialization has not been done.
+ * 1 means the initialization has been done.
+ */
+int DMA_InjFifoRgetFifoFullInitHasBeenDone = 0;
+
+
+/*!
+ * \brief Pointer to Barrier function Used By Remote Get Fifo Full Interrupt Handler
+ */
+static void (*DMA_RgetFifoFullHandlerBarrierFcn)(void *);
+/*!
+ * \brief Generic arg for Barrier function
+ */
+static void *DMA_RgetFifoFullHandlerBarrierArg;
+
+
+/*!
+ * \brief Remote Get Fifo Full Interrupt Handler
+ *
+ * This function receives control when a remote get fifo becomes full
+ * It attempts to recover from the condition and restart the DMA.
+ * It receives control in all cores (a broadcast interrupt).
+ *
+ * Upon entry, the DMA is assumed to have been stopped, both the iDMA
+ * and the rDMA. This has been done by the kernel's interrupt
+ * handler that invoked this function.
+ */
+void DMA_InjFifoRgetFifoFullInterruptHandler(uint32_t arg1,
+ uint32_t arg2,
+ uint32_t arg3,
+ uint32_t arg4)
+{
+ uint32_t global_fnum, freeSpaceInBytes;
+ uint32_t core_num = Kernel_PhysicalProcessorID();
+
+ /* If Init has not been done yet, ignore the interrupt.
+ */
+ if ( DMA_InjFifoRgetFifoFullInitHasBeenDone == 0 )
+ {
+ pthread_poof_np(); /* Return from this interrupt. */
+ }
+
+ /*
+ * Barrier across all cores. This is needed to ensure that
+ * 1. The DMA has been stopped (only the last core to see this interrupt
+ * stops the DMA).
+ * 2. We don't exit from this handler until the core that needs to handle
+ * the rget fifo full condition has cleared the condition causing the
+ * interrupt, or else it will fire right away again.
+ *
+ * This barrier, while allocated by the main core of each process on the
+ * compute node, has been modified during DMA SPI Setup to expect the
+ * appropriate number of cores to participate.
+ */
+
+ DMA_RgetFifoFullHandlerBarrierFcn( DMA_RgetFifoFullHandlerBarrierArg );
+
+ /*
+ * For each injection fifo...
+ * For each entry of the RgetFifoFullHandlerTable that is managed
+ * by our core and has a registered rget fifo full handler,
+ * 1. Determine whether this rget fifo is full (or nearly so)
+ * 2. If full, call the registered handler to handle the condition.
+ */
+ for ( global_fnum=0; global_fnum<DMA_NUM_INJ_FIFOS; global_fnum++)
+ {
+ if ( ( DMA_RgetFifoFullHandlerTable[global_fnum].core_num == core_num ) &&
+ ( DMA_RgetFifoFullHandlerTable[global_fnum].handler ) )
+ {
+ /* The rget fifo is considered full (or nearly so) if there is
+ * only enough freespace in the fifo to hold one descriptor or less.
+ */
+ freeSpaceInBytes =
+ DMA_InjFifoGetFreeSpaceById (
+ DMA_RgetFifoFullHandlerTable[global_fnum].fg_ptr,
+ global_fnum & 0x1f, /* relative fifo number */
+ 1,
+ 1) << 4;
+ if ( freeSpaceInBytes <= (DMA_MIN_INJECT_SIZE_IN_QUADS*16) +
+ DMA_FIFO_DESCRIPTOR_SIZE_IN_BYTES )
+ {
+ /*
+ * Call the handler function to free up space in the fifo,
+ * if possible.
+ */
+
+ (*(DMA_RgetFifoFullHandlerTable[global_fnum].handler))(
+ DMA_RgetFifoFullHandlerTable[global_fnum].fg_ptr,
+ global_fnum & 0x1F,
+ DMA_RgetFifoFullHandlerTable[global_fnum].handler_parm);
+ }
+ }
+ }
+
+ /*
+ * Barrier. Wait here until all cores reach this point in the interrupt
+ * handler.
+ */
+
+ DMA_RgetFifoFullHandlerBarrierFcn( DMA_RgetFifoFullHandlerBarrierArg );
+
+ /*
+ * Exit from the interrupt.
+ */
+ pthread_poof_np();
+}
+
+/*!
+ * \brief Remote Get Fifo Full Initialization
+ *
+ * Initialize data structures and interrupt handlers to handle a remote get
+ * fifo full condition.
+ */
+void DMA_InjFifoRgetFifoFullInit( Kernel_InterruptGroup_t rget_interruptGroup,
+ void (*rget_barrier)(void *) ,
+ void *rget_barrier_arg
+ )
+{
+ int i;
+
+ /*
+ * Clear the handler table.
+ */
+ for ( i=0; i<DMA_NUM_INJ_FIFOS; i++ )
+ {
+ DMA_RgetFifoFullHandlerTable[i].fg_ptr = NULL;
+ DMA_RgetFifoFullHandlerTable[i].handler = NULL;
+ DMA_RgetFifoFullHandlerTable[i].handler_parm = NULL;
+ DMA_RgetFifoFullHandlerTable[i].core_num = 0;
+ }
+
+ /*
+ * Clear the lockbox counter associated with this interrupt.
+ * The lockbox keeps track of which cores have entered and exited
+ * the kernel's interrupt handler.
+ */
+ LockBox_FetchAndClear( rget_interruptGroup );
+
+ DMA_RgetFifoFullHandlerBarrierFcn = rget_barrier;
+ DMA_RgetFifoFullHandlerBarrierArg = rget_barrier_arg;
+
+ /*
+ * Register the interrupt handler to handle the remote get
+ * fifo full condition.
+ */
+ Kernel_SetCommThreadConfig(Kernel_MkInterruptID(_BGP_IC_DMA_NFT_G3_HIER_POS, 24),
+ COMMTHRD_OPCODE_BCAST |
+ COMMTHRD_OPCODE_CALLFUNC,
+ rget_interruptGroup,
+ DMA_InjFifoRgetFifoFullInterruptHandler,
+ 0, 0, 0, 0);
+}
diff --git a/arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c b/arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c
new file mode 100644
index 00000000000000..c97fd8f13eed75
--- /dev/null
+++ b/arch/powerpc/syslib/bgdd/spi/DMA_RecFifo.c
@@ -0,0 +1,3016 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2006,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ ********************************************************************/
+/*! \file DMA_RecFifo.c
+ *
+ * \brief Implementations for Functions Defined in bgp/arch/include/spi/DMA_RecFifo.h.
+ *
+ */
+#include <linux/version.h>
+#include <linux/module.h>
+#include <asm/bitops.h>
+
+#undef DEBUG_PRINT
+/* #define DEBUG_PRINT 1 */
+
+#ifndef __LINUX_KERNEL__
+
+#include <spi/DMA_RecFifo.h>
+#include <stdio.h>
+#include <bpcore/ppc450_inlines.h>
+#include <bpcore/ic_memmap.h>
+#include <common/bgp_bitnumbers.h>
+#include <errno.h>
+
+#else
+
+#include <spi/linux_kernel_spi.h>
+/* Interrupt encoding for Blue Gene/P hardware).
+ * Given a BIC group and bit index within the group,
+ * bic_hw_to_irq(group, gint) returns the Linux IRQ number.
+ * ( really from asm/bluegene.h but we get mismatches if we include it)
+ */
+
+#endif /* ! __LINUX_KERNEL__ */
+
+#include <linux/dma-mapping.h>
+
+#define TRACE(x) printk x
+
+
+#if defined(BGP_DD1_WORKAROUNDS)
+
+/*!
+ * \brief Number of times the poll functions have been called and returned
+ * no packets processed.
+ *
+ * Special Value: -1 means that the Kernel_ClearFullReceptionFifo() syscall
+ * has been invoked, but no packets have been processed
+ * since. This tells the poll function that even if it
+ * does not process any packets, it should not increment
+ * this counter and ultimately issue the syscall again, because
+ * there is no need.
+ */
+int NumEmptyPollFunctionCalls = -1;
+
+/*!
+ * \brief Limit for NumEmptyPollFunctionCalls
+ */
+const int NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT = 10;
+
+#endif
+
+#if defined(CONFIG_BGP_STATISTICS)
+int reception_fifo_histogram[33] ;
+unsigned int reception_hi_watermark ;
+#endif
+static inline int get_tlb_pageid(int tlbindex)
+ {
+ int rc ;
+ /* PPC44x_TLB_PAGEID is 0 */
+ asm volatile( "tlbre %[rc],%[index],0"
+ : [rc] "=r" (rc)
+ : [index] "r" (tlbindex)
+ ) ;
+ return rc ;
+ }
+
+static inline int get_tlb_xlat(int tlbindex)
+ {
+ int rc ;
+ /* PPC44x_TLB_XLAT is 1 */
+ asm volatile( "tlbre %[rc],%[index],1"
+ : [rc] "=r" (rc)
+ : [index] "r" (tlbindex)
+ ) ;
+ return rc ;
+ }
+
+static inline int get_tlb_attrib(int tlbindex)
+ {
+ int rc ;
+ /* PPC44x_TLB_ATTRIB is 2 */
+ asm volatile( "tlbre %[rc],%[index],2"
+ : [rc] "=r" (rc)
+ : [index] "r" (tlbindex)
+ ) ;
+ return rc ;
+ }
+
+static inline int search_tlb(unsigned int vaddr)
+ {
+ int rc ;
+ /* PPC44x_TLB_ATTRIB is 2 */
+ asm volatile( "tlbsx %[rc],0,%[vaddr]"
+ : [rc] "=r" (rc)
+ : [vaddr] "r" (vaddr)
+ ) ;
+ return rc ;
+ }
+
+static void show_tlbs(unsigned int mioaddr) __attribute__((unused)) ;
+static void show_tlbs(unsigned int mioaddr)
+{
+ int i ;
+ int tlb_index = search_tlb(mioaddr) ;
+ for(i=0;i<64;i+=1)
+ {
+ int pageid=get_tlb_pageid(i) ;
+ int xlat=get_tlb_xlat(i) ;
+ int attrib=get_tlb_attrib(i) ;
+ if( pageid & 0x00000200)
+ {
+ printk(KERN_INFO "tlb[%02d]=[%08x %08x %08x]\n",i,pageid,xlat,attrib) ;
+ }
+ }
+ printk(KERN_INFO "mioaddr=0x%08x tlb_index=%d\n", mioaddr,tlb_index) ;
+}
+
+/* char temp_packet[256] __attribute__ ((aligned ( 16))) ; */
+
+/*!
+ * \brief DMA Reception Fifo Shared Memory Structure
+ *
+ * This structure must be shared among the processors in a compute node. It
+ * contains info that must be maintained and shared for the duration of a job.
+ * This storage is static, maintained across function calls.
+ * In sharedmemory mode, core 0 maintains this info.
+ * In virtual node mode, each core maintains its own info.
+ *
+ */
+typedef struct DMA_RecFifoSharedMemory_t
+{
+ DMA_RecFifoRecvFunction_t recvFunctions[256]; /*!< The registered "normal"
+ reception fifo receive functions.
+ Filled in by calls to
+ DMA_RecFifoRegisterRecvFunction(). */
+
+ void *recvFunctionsParms[256]; /*!< recvFunctionsParms[i] is the
+ parameter to pass to
+ recvFunctions[i].
+ Filled in by calls to
+ DMA_RecFifoRegisterRecvFunction(). */
+
+ DMA_RecFifoRecvFunction_t headerRecvFunction; /*!< The registered "header"
+ reception fifo receive function.
+ Filled in by a call to
+ DMA_RecFifoRegisterRecvFunction(). */
+
+ void *headerRecvFunctionParm; /*!< The parameter to pass to
+ headerRecvFunction.
+ Filled in by a call to
+ DMA_RecFifoRegisterRecvFunction(). */
+
+ DMA_RecFifoRecvFunction_t errorRecvFunction; /*!< The registered "error"
+ reception fifo receive function.
+ Defaulted to
+ &DMA_RecFifoDefaultErrorRecvFunction.
+ Filled in by a call to
+ DMA_RecFifoRegisterRecvFunction(). */
+
+ void *errorRecvFunctionParm; /*!< The parameter to pass to
+ errorRecvFunction.
+ Filled in by a call to
+ DMA_RecFifoRegisterRecvFunction(). */
+
+ DMA_RecFifoGroup_t groups[DMA_NUM_REC_FIFO_GROUPS]; /*!< Reception fifo
+ group structures, one for each group.
+ groups[i] is the group shared by all
+ users of reception fifo group i. */
+
+ unsigned int groupsInitialized[DMA_NUM_REC_FIFO_GROUPS]; /*!< Indicator of
+ groups[i] having been initialized.
+ 0 = not initialized by a call to
+ DMA_RecFifoGetFifoGroup() for
+ group i.
+ 1 = initialized. */
+
+} DMA_RecFifoSharedMemory_t;
+
+
+/*!
+ * \brief Storage for the Reception Fifo Shared Memory Structure
+ *
+ * This storage is static, maintained across function calls.
+ * In sharedmemory mode, core 0 maintains reception fifo info.
+ * In virtual node mode, each core maintains its own reception fifo info.
+ */
+static DMA_RecFifoSharedMemory_t DMA_RecFifoInfo;
+
+
+/*!
+ * \brief DMA Packet I/O Vector Structure
+ *
+ * This structure describes the payload of a memory fifo packet.
+ * Because of fifo wrapping, the payload may consist of 0, 1, or 2 segments:
+ * - 0 segments: this is a packet in the header-only, debug fifo.
+ * - 1 segment: the packet does not wrap the fifo.
+ * - 2 segments: the packet does wrap the fifo.
+ *
+ */
+typedef struct DMA_PacketIovec_t
+{
+ int num_segments; /*!< Number of segments in the payload */
+ void *payload_ptr[2] ; /*!< Pointer to the payloads in each segment (NULL
+ if not used). */
+ int num_bytes[2]; /*!< Number of payload bytes in each segment (0 if
+ not used). */
+}
+ALIGN_L1D_CACHE DMA_PacketIovec_t;
+
+
+static void dumpmem(const void *address, unsigned int length, const char * label)
+ {
+ int x ;
+ printk(KERN_INFO "(>)[%s:%d] Memory dump: %s\n",__func__, __LINE__,label) ;
+ for (x=0;x<length;x+=32)
+ {
+ int *v = (int *)(address+x) ;
+ printk(KERN_INFO "%p: %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
+ ) ;
+ }
+ printk(KERN_INFO "(<)[%s:%d] Memory dump\n",__func__, __LINE__) ;
+ }
+
+
+
+/*!
+ * \brief Get DMA Reception Fifo Group
+ *
+ * This is a wrapper around a System Call. This function returns THE
+ * one-and-only pointer to the fifo group structure, with the entries all
+ * filled in from info in the DCRs. If called multiple times with the same
+ * group, it will always return the same pointer, and the system call will
+ * not be invoked again.
+ *
+ * It must be called AFTER DMA_RecFifoSetMap().
+ *
+ * By convention, the same "target" is used for normal and header fifo
+ * interrupts (could be changed). In addition, by convention, interrupts for
+ * fifos in group g come out of the DMA as non-fatal irq bit 28+g,
+ * ie, only fifos in group g can cause the "type g" threshold interrupts.
+ *
+ * \param[in] grp The group number (0 through DMA_NUM_REC_FIFO_GROUPS).
+ * \param[in] target The core that will receive the interrupt when a
+ * fifo in this group reaches its threshold
+ * (0 to DMA_NUM_REC_FIFO_GROUPS-1).
+ * Ignored on subsequent call with the same group.
+ * \param[in] normal_handler A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * normal fifo in this group reaches its threshold.
+ * This function must be coded to take 4 uint32_t
+ * parameters:
+ * - A pointer to storage specific to this
+ * handler. This is the normal_handler_parm
+ * specified on this function call.
+ * - 3 uint32_t parameters that are not used.
+ * If normal_handler is NULL, threshold interrupts
+ * are not delivered for normal fifos in this group.
+ * Ignored on subsequent call with the same group.
+ * \param[in] normal_handler_parm A pointer to storage that should be passed
+ * to the normal interrupt handling function
+ * (see normal_handler parameter).
+ * Ignored on subsequent call with the same
+ * group.
+ * \param[in] header_handler ** This parameter is deprecated. Specify NULL.**
+ * A pointer to the function to receive control in
+ * the I/O thread to handle the interrupt when a
+ * header fifo in this group reaches its threshold.
+ * This function must be coded to take 2 parameters:
+ * void* A pointer to storage specific to this
+ * handler. This is the header_handler_parm
+ * specified on this function call.
+ * int The global fifo ID of the fifo that hit
+ * its threshold (0 through
+ * NUM_DMA_REC_FIFOS-1).
+ * If header_handler is NULL, threshold interrupts
+ * are not delivered for header fifos in this group.
+ * Ignored on subsequent call with the same group.
+ * \param[in] header_handler_parm ** This parameter is deprecated. Specify
+ * NULL. **
+ * A pointer to storage that should be passed
+ * to the header interrupt handling function
+ * (see header_handler parameter).
+ * Ignored on subsequent call with the same
+ * group.
+ * \param[in] interruptGroup A InterruptGroup_t that identifies the
+ * group of interrupts that the fifos in this group
+ * will become part of.
+ * Ignored on subsequent call with the same group.
+ *
+ * \return RecFifoGroupStruct Pointer to a DMA Reception Fifo Group structure
+ * that reflects the fifos that are being used in
+ * this group. This same structure is shared by
+ * all users of this reception fifo group.
+ * NULL is returned if an error occurs.
+ *
+ * \note The following comments from Phil about the internals of the syscall:
+ * - error checks
+ * - 0 <= group_id < 4
+ * - the start of the fifo group is a valid virtual address (tlb mapped)?
+ * - disable the rDMA
+ * - call _BGP_rDMA_Fifo_Get_Map to get the DCR mapping information
+ * - loop through the map to determine how many and which fifos in this group
+ * are used, including headers
+ * - filling in the addresses of used fifos
+ * - In particular, any pointer to any fifo in the group that is not used
+ * will have a null pointer
+ * - furthermore,
+ * - write starting values to all used fifos
+ * - make sure all interrupts are cleared
+ * - enable rDMA
+ *
+ */
+DMA_RecFifoGroup_t *
+DMA_RecFifoGetFifoGroup(
+ int grp,
+ int target,
+ Kernel_CommThreadHandler normal_handler,
+ void *normal_handler_parm,
+ Kernel_CommThreadHandler header_handler,
+ void *header_handler_parm,
+ Kernel_InterruptGroup_t interruptGroup
+ )
+{
+ int rc;
+
+ TRACE((
+ KERN_INFO "(>) DMA_RecFifoGetFifoGroup\n"));
+
+ SPI_assert( (0 <= grp ) && (grp < DMA_NUM_REC_FIFO_GROUPS ) );
+ SPI_assert( (0 <= target) && (target < DMA_NUM_REC_FIFO_GROUPS ) );
+
+ if ( DMA_RecFifoInfo.groupsInitialized[grp] == 0 ) /* Is */
+ /* DMA_RecFifoGroups[grp] not */
+ /* filled-in yet? */
+ {
+ /*
+ * If an interrupt handler has been specified, invoke the system call
+ * to configure the kernel to invoke the handler when the reception
+ * fifo threshold crossed interrupt fires.
+ */
+
+ if (normal_handler)
+ {
+ {
+ /*
+ * Calculate the IRQ to be one of
+ * - 28: rec fifo type 0 crossed threshold
+ * - 29: rec fifo type 0 crossed threshold
+ * - 30: rec fifo type 0 crossed threshold
+ * - 31: rec fifo type 0 crossed threshold
+ * based on the DMA group number.
+ */
+ unsigned irqInGroup = 28 + grp;
+/* tjcw ???? not sure what gets the right interrupts ... */
+/* 28+ gives something to do with memory tranfers. */
+/* we want 8+, which is related to FIFO fullness */
+/* unsigned irqInGroup = 8 + grp; */
+
+ /*
+ * Calculate an interrupt ID, which is the BIC interrupt group (2)
+ * combined with the IRQ number.
+ */
+/* int interruptID = Kernel_MkInterruptID(_BGP_IC_DMA_NFT_G2_HIER_POS, */
+/* irqInGroup); */
+ int interruptID = bic_hw_to_irq(_BGP_IC_DMA_NFT_G2_HIER_POS,irqInGroup);
+
+ /*
+ * Calculate the opcode indicating
+ * - the target core for interrupt
+ * - to call the specified function when the interrupt fires
+ * - to disable interrupts before calling the specified function
+ * - to enable interrupts after callling the specified function
+ */
+ int opcode = ( COMMTHRD_OPCODE_CORE0 + target ) |
+ COMMTHRD_OPCODE_CALLFUNC |
+ COMMTHRD_OPCODE_DISABLEINTONENTRY |
+ COMMTHRD_OPCODE_ENABLEINTONPOOF ;
+
+ /*
+ * Configure this interrupt with the kernel.
+ */
+ TRACE((
+ KERN_INFO "(=) DMA_RecFifoGetFifoGroup interruptID=%d\n",interruptID));
+ rc = Kernel_SetCommThreadConfig(interruptID,
+ opcode,
+ (uint32_t*)interruptGroup,
+ normal_handler,
+ (uint32_t)normal_handler_parm,
+ (uint32_t)NULL,
+ (uint32_t)NULL,
+ (uint32_t)NULL);
+ if (rc) return NULL;
+ }
+
+ /*
+ * Proceed to get the reception fifo group
+ */
+ rc = Kernel_RecFifoGetFifoGroup( (uint32_t*)&(DMA_RecFifoInfo.groups[grp]),
+ grp,
+ target,
+ (uint32_t) NULL, /* Normal handler. Not used */
+ (uint32_t) NULL, /* Normal handler parm. Not used */
+ (uint32_t) NULL, /* Header handler. Not used */
+ (uint32_t) NULL, /* Header handler parm. Not used */
+ (uint32_t) NULL /* InterruptGroup. Not used */
+ );
+ if ( rc == 0 ) /* Success? */
+ {
+ DMA_RecFifoInfo.groupsInitialized[grp] = 1; /* Remember success. */
+ }
+ else
+ {
+ return NULL; /* Failure */
+ }
+ }
+ }
+ TRACE((
+ KERN_INFO "(<) DMA_RecFifoGetFifoGroup\n"));
+
+ return &(DMA_RecFifoInfo.groups[grp]); /* Return the pointer. */
+
+}
+
+
+/*!
+ * \brief Register a Reception Fifo Receive Function
+ *
+ * Register a specified receive function to handle packets having a specific
+ * "registration ID". It returns a registration ID (0-255) that is to be used
+ * in the packet header Func_Id field, such that packets that arrive in a
+ * reception fifo will result in the corresponding receive function being called
+ * when that fifo is processed by a polling or interrupt handler function.
+ *
+ * \param[in] recv_func Pointer to the receive function.
+ * \param[in] recv_func_parm Arbitrary pointer to be passed to the
+ * recv_func when it is called.
+ * \param[in] is_error_function 1 means this is the receiver function
+ * to be called if a packet contains an invalid
+ * (unregistered) registration ID. The return
+ * value from this function is zero, indicating
+ * success, not indicating a registration ID.
+ * A default function is provided if one is not
+ * registered. If there is already a non-default
+ * error receive function registered, -EBUSY is
+ * returned.
+ * 0 means this is not the error receiver
+ * function.
+ * \param[in] is_header_fifo Indicates whether the fifo is normal or
+ * header:
+ * - 0 is normal. The return code is the
+ * registration ID.
+ * - 1 is header. The return code is 0,
+ * indicating success, because packets in
+ * header fifos are direct-put packets, and
+ * hence have no registration ID.
+ * If there is already a header receive function
+ * registered, -EBUSY is returned.
+ *
+ * If both is_error_function and is_header_fifo are 1, -EINVAL is returned.
+ *
+ * \retval 0 This is a registration ID if is_error_function=0 and
+ * is_header_fifo=0. Otherwise, it indicates success.
+ * 1-255 This is a registration ID. Successful.
+ * negative Failure. This is a negative errno value.
+ *
+ * \see DMA_RecFifoDeRegisterRecvFunction
+ */
+static int DMA_RecFifoRegisterRecvFunction_next_free_ID = 0;
+int DMA_RecFifoRegisterRecvFunction(
+ DMA_RecFifoRecvFunction_t recv_func,
+ void *recv_func_parm,
+ int is_error_function,
+ int is_header_fifo
+ )
+{
+ int next_free_ID = DMA_RecFifoRegisterRecvFunction_next_free_ID;
+ int i;
+
+ /* Perform error checks */
+ if ( ( recv_func == NULL ) ||
+ ( ( is_error_function != 0 ) &&
+ ( is_error_function != 1 ) ) ||
+ ( ( is_header_fifo != 0 ) &&
+ ( is_header_fifo != 1 ) ) ||
+ ( ( is_header_fifo == 1 ) && ( is_error_function == 1 ) ) )
+ {
+ return -EINVAL;
+ }
+
+ /*
+ * Handle a "header" receive function.
+ */
+ if ( is_header_fifo == 1 )
+ {
+ if ( DMA_RecFifoInfo.headerRecvFunction != NULL ) /* Already registered?*/
+ {
+ return -EBUSY;
+ }
+ DMA_RecFifoInfo.headerRecvFunction = recv_func;
+ DMA_RecFifoInfo.headerRecvFunctionParm = recv_func_parm;
+ return 0; /* Indicate success */
+ }
+
+ /*
+ * Handle a "error" receive function.
+ */
+ if ( is_error_function == 1 )
+ {
+ if ( DMA_RecFifoInfo.errorRecvFunction !=
+ &DMA_RecFifoDefaultErrorRecvFunction ) /* Already registered? */
+ {
+ return -EBUSY;
+ }
+ DMA_RecFifoInfo.errorRecvFunction = recv_func;
+ DMA_RecFifoInfo.errorRecvFunctionParm = recv_func_parm;
+ return 0; /* Indicate success */
+ }
+
+ /*
+ * Handle a "normal" receive function.
+ */
+
+ for (i=next_free_ID; i < 256; i++) /* Search for an empty slot */
+ {
+ if ( DMA_RecFifoInfo.recvFunctions[i] == NULL ) /* Found a slot? */
+ {
+ DMA_RecFifoInfo.recvFunctions[i] = recv_func;
+ DMA_RecFifoInfo.recvFunctionsParms[i] = recv_func_parm;
+ next_free_ID = i;
+ return i; /* Return the registration ID */
+ }
+ }
+ DMA_RecFifoRegisterRecvFunction_next_free_ID = next_free_ID;
+
+ return -EBUSY; /* No open slots */
+
+}
+
+
+/*!
+ * \brief De-Register a Reception Fifo Receive Function
+ *
+ * De-register a previously registered receive function.
+ *
+ * \param[in] registrationId Registration Id returned from
+ * DMA_RecFifoRegisterRecvFunction (0..255).
+ * A negative value means that no
+ * registration id is specified.
+ * \param[in] is_error_function 1 means the error receive function is
+ * to be de-registered.
+ * 0 otherwise.
+ * \param[in] is_header_fifo 1 means the header fifo receive function is
+ * to be de-registered.
+ * 0 otherwise.
+ *
+ * \retval 0 Success
+ * negative Error value
+ *
+ * \see DMA_RecFifoRegisterRecvFunction
+ */
+int DMA_RecFifoDeRegisterRecvFunction(
+ int registrationId,
+ int is_error_function,
+ int is_header_fifo
+ )
+{
+ /* Perform error checks */
+ if ( ( registrationId > 255 ) ||
+ ( ( is_error_function != 0 ) &&
+ ( is_error_function != 1 ) ) ||
+ ( ( is_header_fifo != 0 ) &&
+ ( is_header_fifo != 1 ) ) )
+ {
+ return -EINVAL;
+ }
+
+ /*
+ * Handle a "header" receive function.
+ */
+ if ( is_header_fifo == 1 )
+ {
+ DMA_RecFifoInfo.headerRecvFunction = NULL;
+ DMA_RecFifoInfo.headerRecvFunctionParm = NULL;
+ }
+
+ /*
+ * Handle a "error" receive function.
+ */
+ if ( is_error_function == 1 )
+ {
+ DMA_RecFifoInfo.errorRecvFunction = NULL;
+ DMA_RecFifoInfo.errorRecvFunctionParm = NULL;
+ }
+
+ /*
+ * Handle a "normal" receive function.
+ */
+
+ if ( registrationId >= 0 )
+ {
+ DMA_RecFifoInfo.recvFunctions[registrationId] = NULL;
+ DMA_RecFifoInfo.recvFunctionsParms[registrationId] = NULL;
+ DMA_RecFifoRegisterRecvFunction_next_free_ID = 0; /* Start at beginning next time */
+ }
+
+ return 0;
+
+}
+
+
+/*!
+ * \brief DMA Reception Fifo Default Error Receive Function
+ *
+ * This is the default function that will handle packets having an
+ * unregistered registration ID.
+ *
+ * \param[in] f_ptr Pointer to the reception fifo.
+ * \param[in] packet_ptr Pointer to the packet header (== va_head).
+ * This is quad-aligned for optimized copying.
+ * \param[in] recv_func_parm Pointer to storage specific to this receive
+ * function. This pointer was specified when the
+ * receive function was registered with the kernel,
+ * and is passed to the receive function
+ * unchanged.
+ * \param[in] payload_ptr Pointer to the beginning of the payload.
+ * This is quad-aligned for optimized copying.
+ * \param[in] payload_bytes Number of bytes in the payload
+ *
+ * \retval -1 An unregistered packet was just processed. This is considered
+ * an error.
+ */
+int DMA_RecFifoDefaultErrorRecvFunction(
+ DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ )
+{
+ int i;
+
+ printf ( "\nUnregistered Packet Received in Reception Fifo %d\n",
+ f_ptr->global_fifo_id);
+
+ printf ( "Packet Header:\n");
+ printf ( "%08x%08x%08x%08x\n",*((int*)&packet_ptr[0]),
+ *((int*)&packet_ptr[4]),
+ *((int*)&packet_ptr[8]),
+ *((int*)&packet_ptr[12]));
+ printf ( "Packet Payload:\n");
+
+ for (i=0; i<payload_bytes; i+=16);
+ {
+ printf ( "%08x%08x%08x%08x\n",*((int*)&payload_ptr[i]),
+ *((int*)&payload_ptr[i+4]),
+ *((int*)&payload_ptr[i+8]),
+ *((int*)&payload_ptr[i+12]));
+ }
+
+ SPI_assert(0);
+
+ return -1;
+}
+
+
+/*!
+ * \brief DMA Reception Fifo Get Addresses
+ *
+ * Analyze the packet at the head of the reception fifo and return a
+ * DMA_PacketIovec_t describing the payload of the packet. In particular,
+ * determine if the packet is contiguous in the fifo, or whether it wraps
+ * around to the start of the fifo.
+ *
+ * \param[in] f_ptr Pointer to the reception fifo structure.
+ * \param[in,out] io_vec Pointer to the packet I/O vector structure to
+ * be filled in.
+ *
+ * \return The io_vec structure has been filled-in.
+ *
+ * \pre The caller has determined that the fifo has a packet in it (it
+ * is not empty).
+ *
+ * \note
+ * - For non-header packets, only non-DMA packets (memory fifo packets)
+ * are in the fifo and need to be handled.
+ */
+void DMA_RecFifoGetAddresses(
+ DMA_RecFifo_t *f_ptr,
+ DMA_PacketIovec_t *io_vec
+ )
+{
+ DMA_PacketHeader_t *packet_ptr;
+ unsigned int payload_bytes;
+ unsigned int payload_bytes_to_end_of_fifo = 0;
+
+ SPI_assert( f_ptr != NULL );
+ SPI_assert( io_vec != NULL );
+
+ if ( f_ptr->global_fifo_id < DMA_NUM_NORMAL_REC_FIFOS ) /* Is this a */
+ /* normal fifo?*/
+ { /* Yes. Process a normal packet */
+ packet_ptr = (DMA_PacketHeader_t*)f_ptr->dma_fifo.va_head; /* Point */
+ /* to the packet. */
+
+ payload_bytes = ( (packet_ptr->Chunks + 1) << 5 ) -
+ sizeof(DMA_PacketHeader_t); /* Calculate payload bytes. */
+
+ io_vec->payload_ptr[0] =
+ (char*)packet_ptr +
+ sizeof(DMA_PacketHeader_t); /* Set first payload ptr */
+
+ /* Determine if the payload is contiguous in the fifo, and set up */
+ /* the iovec accordingly. */
+ if ( ( payload_bytes <= 16 ) || /* A 32-byte packet will always be */
+ /* contiguous...this is an */
+ /* optimization to avoid the next */
+ /* set of calculations. */
+ ( payload_bytes <= /* Calculate how much space to the */
+ ( payload_bytes_to_end_of_fifo = /* end of the fifo. */
+ ( (unsigned)f_ptr->dma_fifo.va_end - /* Check if entire */
+ (unsigned)io_vec->payload_ptr[0] ) ) ) ) /* payload fits.*/
+ {
+ /* Set up io_vec for contiguous payload */
+ io_vec->num_segments = 1; /* Indicate contiguous payload. */
+ io_vec->num_bytes[0] = payload_bytes;
+ io_vec->payload_ptr[1] = NULL;
+ io_vec->num_bytes[1] = 0;
+ return;
+ }
+ else
+ { /* Set up io_vec for non-contiguous payload. */
+
+ io_vec->num_segments = 2; /* Indicate split payload. */
+ io_vec->num_bytes[0] = payload_bytes_to_end_of_fifo;
+ io_vec->payload_ptr[1] = f_ptr->dma_fifo.va_start;
+ io_vec->num_bytes[1] = payload_bytes -
+ payload_bytes_to_end_of_fifo;
+ return;
+ }
+ } /* End: Non-header packet */
+
+ else /* Header packet. */
+
+ { /* Header packet */
+ io_vec->num_segments = 0; /* Indicate header fifo. */
+ io_vec->payload_ptr[0] = NULL; /* Everything else is NULL or zero. */
+ io_vec->payload_ptr[1] = NULL;
+ io_vec->num_bytes[0] = 0;
+ io_vec->num_bytes[1] = 0;
+ return;
+ }
+
+} /* End: DMA_RecFifoGetAddresses() */
+
+
+/*!
+ * \brief Get Index of Next Reception Fifo in Group
+ *
+ * A reception fifo group contains up to DMA_NUM_REC_FIFOS_PER_GROUP.
+ * It contains an array of fifos. Up to fg_ptr->num_normal_fifos normal
+ * fifos are in the first array slots. Up to 1 header fifo is in the
+ * last array slot.
+ *
+ * This function returns the array index of the next normal fifo in the group
+ * that is being used, based upon the desired fifo_index and the not-empty
+ * status.
+ *
+ * If *not_empty_status is -1, the status is fetched from the DMA SRAM (first
+ * time condition).
+ *
+ * If the DMA SRAM not-empty status for this group is all zero (all fifos are
+ * empty), the status is checked num_empty_passes times with a slight delay
+ * in between to give the DMA time to make progress before returning a -1,
+ * indicating that there is nothing more to process.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group
+ * \param[in] desired_fifo_index Index of the fifo that is desired to be
+ * processed.
+ * \param[in,out] fifo_bit Pointer to the bit in the not_empty_status
+ * that corresponds to the desired_fifo_index
+ * (on input) and the returned next_fifo_index
+ * (on output).
+ * \param[in] num_empty_passes When the not-empty status indicates that all
+ * fifos in the group are emtpy, this is the
+ * number of times the not-empty status is
+ * re-fetched and re-checked before officially
+ * declaring that they are indeed empty
+ * (0 means no extra passes are made).
+ * \param[in] not_empty_poll_delay The number of pclks to delay between polls
+ * of the not-empty status when the fifos are
+ * empty.
+ * \param[in,out] not_empty_status Pointer to the location to shadow the
+ * not empty status.
+ *
+ * \retval next_fifo_index Index of the next fifo in the group to be
+ * processed.
+ * \retval -1 Indicates that the normal fifos in the group are
+ * all empty.
+ *
+ * \post The va_tail of the fifo that is returned has been refreshed from
+ * the DMA hardware.
+ *
+ */
+__INLINE__ int DMA_RecFifoGetNextFifo(
+ DMA_RecFifoGroup_t *fg_ptr,
+ int desired_fifo_index,
+ unsigned int *fifo_bit,
+ int num_empty_passes,
+ int not_empty_poll_delay,
+ unsigned int *not_empty_status
+ )
+{
+ unsigned int status = *not_empty_status; /* Make a local copy */
+ unsigned int status_bit = *fifo_bit;
+ int fifo_index = desired_fifo_index;
+
+ /*
+ * If *not_empty_status is 0, either the status has not been fetched yet
+ * (first-time condition), or all fifos were emptied. Go fetch the
+ * not-empty status again.
+ */
+ if ( status == 0 )
+ {
+ status = DMA_RecFifoGetNotEmpty( fg_ptr,
+ 0 ); /* Get Normal fifo */
+ /* not-empty status. */
+ *not_empty_status = status; /* Return the status to the caller */
+
+#ifdef DEBUG_PRINT
+ printf("New notEmptyStatus1=0x%08x\n",*not_empty_status);
+#endif
+ }
+
+ /*
+ * If the DMA SRAM not-empty status for this group is all zero (all fifos are
+ * empty), the status is checked num_empty_passes times with a slight delay
+ * in between to give the DMA time to make progress before returning a -1,
+ * indicating that there is nothing more to process.
+ */
+ while ( ( status == 0 ) &&
+ ( num_empty_passes-- > 0 ) )
+ {
+ /* Delay, allowing the DMA to update its status */
+ unsigned int pclks = not_empty_poll_delay;
+ while( pclks-- )
+ {
+ asm volatile("nop;");
+ }
+
+ /* Re-fetch the not-empty status */
+ status = DMA_RecFifoGetNotEmpty( fg_ptr,
+ 0 ); /* Get Normal fifo */
+ /* not-empty status. */
+ *not_empty_status = status; /* Return the status to the caller */
+
+#ifdef DEBUG_PRINT
+ printf("New notEmptyStatus2=0x%08x\n",*not_empty_status);
+#endif
+ }
+
+ if ( status == 0 ) return (-1); /* Can't find any not empty */
+
+ /*
+ * We have some fifos that are not empty.
+ * Determine the fifo_index to be returned.
+ * Loop until we hit a non-empty fifo.
+ */
+#ifdef DEBUG_PRINT
+ printf("Checking status1 = 0x%08x for fifo_index %d, bit 0x%08x\n", status, fifo_index, status_bit);
+#endif
+
+ while ( ( status & status_bit ) == 0 )
+ {
+ fifo_index++; /* Try next fifo. */
+ if ( fifo_index >= fg_ptr->num_normal_fifos ) /* Wrap? */
+ fifo_index = 0; /* Start over with zero. */
+
+ status_bit = _BN(fg_ptr->fifos[fifo_index].global_fifo_id); /* Map to */
+ /* proper not-empty bit. */
+
+#ifdef DEBUG_PRINT
+ printf("Checking status2 = 0x%08x for fifo_index %d, bit 0x%08x\n", status, fifo_index, status_bit);
+#endif
+ }
+
+ /* Refresh the tail because the DMA may have moved it */
+ DMA_RecFifoGetTailById( fg_ptr,
+ fifo_index );
+
+ *fifo_bit = status_bit; /* Return the fifo index and its bit */
+
+#ifdef DEBUG_PRINT
+ printf("Returning fifo_index=%d, status bit 0x%08x\n",fifo_index,status_bit);
+#endif
+
+ return (fifo_index);
+
+} /* End: DMA_RecFifoGetNextFifo() */
+
+
+/*!
+ * \brief Poll Normal Reception Fifos
+ *
+ * Poll the "normal" reception fifos in the specified fifo group, removing one
+ * packet after another from the fifos, dispatching the appropriate receive
+ * function for each packet, until one of the following occurs:
+ * 1. Total_packets packets are received
+ * 2. All the fifos are empty
+ * 3. A receive function returns a non-zero value
+ * 4. The last packet removed from a fifo has an invalid registration id. The
+ * error receive function will have been called, but polling ends.
+ * The invalid packet is counted as a processed packet, and the return
+ * code from the error receive function is returned.
+ *
+ * Polling occurs in a round-robin fashion through the array of normal fifos in
+ * the group, beginning with array index starting_index. If a fifo has a packet,
+ * the appropriate receive function is called. Upon return, the packet is
+ * removed from the fifo (the fifo head is moved past the packet).
+ *
+ * After processing packets_per_fifo packets in a fifo (or emptying that fifo),
+ * the next fifo in the group is processed. When the last index in the fifo
+ * array is processed, processing continues with the first fifo in the array.
+ * Multiple loops through the array of fifos in the group may occur.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location. The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in] total_packets The maximum number of packets that will be
+ * processed.
+ * \param[in] packets_per_fifo The maximum number of packets that will be
+ * processed in a given fifo before switching
+ * to the next fifo.
+ * \param[in] starting_index The fifos in the fifo group are maintained
+ * in an array. This is the array index of the
+ * first fifo to be processed (0 through
+ * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in] num_empty_passes The number of passes over the normal fifos
+ * while they are empty that this function
+ * should tolerate before giving up and
+ * returning. This is an optimization
+ * to catch late arriving packets.
+ * (0 means no extra passes are made).
+ * \param[in] not_empty_poll_delay The number of pclks to delay between polls
+ * of the not-empty status when the fifos are
+ * empty.
+ * \param[in] fg_ptr Pointer to the fifo group.
+ * \param[out] next_fifo_index Pointer to an int where the recommended
+ * starting_index for the next call is returned.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+ * next_fifo_index is set.
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end. next_fifo_index is
+ * set.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ *
+ * \note next_fifo_index is set to the index of the fifo that had the last
+ * packet received if all packets_per_fifo packets were not received from
+ * that fifo. However, if all packets_per_fifo packets were received
+ * from that fifo, the index of the next fifo will be returned.
+ *
+ */
+int DMA_RecFifoPollNormalFifos(int total_packets,
+ int packets_per_fifo,
+ int starting_index,
+ int num_empty_passes,
+ int not_empty_poll_delay,
+ DMA_RecFifoGroup_t *fg_ptr,
+ int *next_fifo_index
+ )
+{
+ int fifo_index; /* Index of fifo being processed */
+ unsigned int fifo_bit_number; /* The bit number of the fifo */
+ /* being processed. Group0: 0-7, */
+ /* Group1: 8-15, Group2: 16-23, */
+ /* Group3: 24-31. Corresponds to */
+ /* the DMA not-empty status bits. */
+ int num_fifos_in_group; /* Number of fifos in this group. */
+ int num_packets_in_fifo; /* Count of packets processed in a */
+ /* fifo. */
+ unsigned int not_empty_status=0; /* Snapshot of the not empty status*/
+ /* for this group. 0 indicates */
+ /* that no snapshot has occurred */
+ /* yet. */
+ int rc = 0; /* Return code from recv_func. */
+ int num_processed = 0; /* Number of packets processed */
+ DMA_PacketIovec_t io_vec; /* Payload I/O vector */
+ DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function */
+ void *recv_func_parm;/* Receive function parameter */
+ int recv_func_id; /* Function ID from the packet */
+ /* header. */
+ void *recv_func_payload;/* Pointer to recv func payload */
+ void *recv_func_packet; /* Pointer to recv func packet */
+ DMA_RecFifo_t *fifo_ptr; /* Pointer to fifo being processed */
+ char temp_packet[256] ALIGN_QUADWORD; /* Temporary packet copy. */
+ /* Align for efficient copying. */
+ char *load_ptr, *store_ptr; /* Used for copying bytes */
+ int num_quads; /* Number of quads to copy */
+ DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */
+
+ SPI_assert( total_packets > 0 );
+ SPI_assert( packets_per_fifo > 0 );
+ SPI_assert( packets_per_fifo <= total_packets );
+ SPI_assert( num_empty_passes >= 0 );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( next_fifo_index != NULL );
+ SPI_assert( ( starting_index >= 0 ) &&
+ ( starting_index < fg_ptr->num_normal_fifos ) );
+
+ num_fifos_in_group = fg_ptr->num_normal_fifos;
+ *next_fifo_index = starting_index; /* Tell caller to start with the same */
+ /* fifo next time. */
+ fifo_index = starting_index; /* Start with the fifo the caller says to*/
+
+#ifdef DEBUG_PRINT
+ int i;
+ for (i=0; i<fg_ptr->num_normal_fifos; i++)
+ printf("FifoIndex=%d <--> GlobalID=%d\n",i,fg_ptr->fifos[i].global_fifo_id);
+#endif
+
+ /*
+ * Circularly loop through the not-empty fifos in the fifo group.
+ * Keep going until one of the termination conditions documented in the
+ * prolog occurs.
+ *
+ */
+ for (;;)
+ {
+ /*
+ * Find the next fifo to process.
+ */
+ fifo_ptr = &fg_ptr->fifos[fifo_index]; /* This is the fifo itself*/
+ fifo_bit_number = _BN(fifo_ptr->global_fifo_id);/* The fifo's status bit*/
+
+ fifo_index = DMA_RecFifoGetNextFifo(fg_ptr,
+ fifo_index,
+ &fifo_bit_number,
+ num_empty_passes,
+ not_empty_poll_delay,
+ &not_empty_status);
+ if (fifo_index < 0) { /* No more packets to process? */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+ /*
+ * If there are no more non-empty fifos, count the number of consecutive
+ * times the poll function came up dry (num_processed == 0), and if it
+ * exceeds a threshold, issue a system call to clear the rDMA's "full
+ * reception fifo" condition so it begins to receive packets again.
+ *
+ * When a non-empty fifo is returned, its shadow va_tail pointer has been
+ * updated to reflect the amount of packet data in the fifo.
+ */
+ if (num_processed > 0) { /* Did we process at least 1 packet? */
+ NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */
+ /* likely not encountered a fifo full */
+ /* condition and stopped. Reset the */
+ /* fifo counter so we will start */
+ /* tracking empty calls to poll. */
+ }
+ else {
+ if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+ (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+ /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+ rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+ /* reception fifos filled and the */
+ /* DMA has stopped. */
+ /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+ NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */
+ /* fill-fifo counter. */
+ }
+ }
+#endif
+ /* printf("Poll: returned %d processed\n",num_processed); */
+ return (num_processed);
+ }
+
+ *next_fifo_index = fifo_index; /* Tell caller to start with this fifo */
+ /* next time. */
+ fifo_ptr = &(fg_ptr->fifos[fifo_index]);
+ num_packets_in_fifo = 0;
+
+ /*
+ * MSYNC before we look at the data in the fifo to ensure that snoops
+ * issued by the DMA have completed. This ensures the L1 cache
+ * invalidations have completed so we don't look at stale data.
+ */
+ _bgp_msync();
+
+ /*
+ * Within a fifo: The area between the va_head and va_tail shadow pointers
+ * contains packets to be processed. Loop, processing those packets until
+ * we have processed packets_per_fifo of them, or all of them, or other
+ * issues come up.
+ *
+ */
+#if defined(CONFIG_BGP_STATISTICS)
+ {
+ unsigned int used_space = (fifo_ptr->dma_fifo.va_tail >= fifo_ptr->dma_fifo.va_head)
+ ? ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 )
+ : (fifo_ptr->dma_fifo.fifo_size + ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) )
+ ;
+ reception_fifo_histogram[fls(used_space)] += 1 ;
+ }
+#endif
+ while ( ( num_packets_in_fifo < packets_per_fifo ) &&
+ ( fifo_ptr->dma_fifo.va_head != fifo_ptr->dma_fifo.va_tail ) )
+ {
+ DMA_RecFifoGetAddresses( fifo_ptr,
+ &io_vec ); /* Get the payload pointer(s) */
+ /* for the packet at the head */
+ /* of the fifo. */
+
+ packet_ptr = (DMA_PacketHeader_t*)
+ fifo_ptr->dma_fifo.va_head; /* Point to packet header*/
+
+#ifdef DEBUG_PRINT
+ printf("ReceivedPacketHead = 0x%08x\n",(unsigned)packet_ptr);
+ printf("ReceivedPacketIovec= 0x%08x %d, 0x%08x %d\n",
+ (unsigned)io_vec.payload_ptr[0], io_vec.num_bytes[0],
+ (unsigned)io_vec.payload_ptr[1], io_vec.num_bytes[1]);
+#endif
+ /*
+ * Determine the receive function to call. Index into
+ * recvFunctions array is in the packet header.
+ */
+ recv_func_id = packet_ptr->Func_Id;
+ recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id];
+ if ( recv_func_ptr != NULL )
+ {
+ recv_func_parm =
+ DMA_RecFifoInfo.recvFunctionsParms[recv_func_id];
+ }
+ else
+ {
+ recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction;
+ recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+ }
+ /*
+ * Use a temporary copy of the packet, when the payload
+ * wraps.
+ */
+ if ( io_vec.num_segments > 1 )
+ {
+#ifdef DEBUG_PRINT
+ printf("Payload Wraps: Packet Header: 0x%08x, Iovecs: 0x%08x %d, 0x%08x %d\n",
+ (unsigned)packet_ptr,
+ (unsigned)io_vec.payload_ptr[0], io_vec.num_bytes[0],
+ (unsigned)io_vec.payload_ptr[1], io_vec.num_bytes[1]);
+#endif
+
+ /* Copy packet header and first payload segment */
+ load_ptr = (char*)packet_ptr;
+ store_ptr = temp_packet;
+ num_quads = (sizeof(DMA_PacketHeader_t) + io_vec.num_bytes[0]) >> 4;
+ while ( num_quads > 0 )
+ {
+#ifdef DEBUG_PRINT
+ printf("load_ptr =0x%08x, load_value =0x%08x%08x%08x%08x\n",
+ (unsigned)load_ptr, *(unsigned*)load_ptr, *(unsigned*)(load_ptr+4),
+ *(unsigned*)(load_ptr+8), *(unsigned*)(load_ptr+12));
+#endif
+ _bgp_QuadLoad ( load_ptr, 0 );
+
+ _bgp_QuadStore( store_ptr, 0 );
+#ifdef DEBUG_PRINT
+ printf("store_ptr=0x%08x, store_value=0x%08x%08x%08x%08x\n",
+ (unsigned)store_ptr, *(unsigned*)store_ptr, *(unsigned*)(store_ptr+4),
+ *(unsigned*)(store_ptr+8), *(unsigned*)(store_ptr+12));
+#endif
+
+ load_ptr += 16;
+ store_ptr += 16;
+ num_quads--;
+ }
+ /* Copy second payload segment */
+ load_ptr = (char*)io_vec.payload_ptr[1];
+ num_quads = io_vec.num_bytes[1] >> 4;
+ while ( num_quads > 0 )
+ {
+#ifdef DEBUG_PRINT
+ printf("load_ptr =0x%08x, load_value =0x%08x%08x%08x%08x\n",
+ (unsigned)load_ptr, *(unsigned*)load_ptr, *(unsigned*)(load_ptr+4),
+ *(unsigned*)(load_ptr+8), *(unsigned*)(load_ptr+12));
+#endif
+ _bgp_QuadLoad ( load_ptr, 0 );
+
+ _bgp_QuadStore( store_ptr, 0 );
+#ifdef DEBUG_PRINT
+ printf("store_ptr=0x%08x, store_value=0x%08x%08x%08x%08x\n",
+ (unsigned)store_ptr, *(unsigned*)store_ptr, *(unsigned*)(store_ptr+4),
+ *(unsigned*)(store_ptr+8), *(unsigned*)(store_ptr+12));
+#endif
+ load_ptr += 16;
+ store_ptr += 16;
+ num_quads--;
+ }
+ recv_func_payload = temp_packet + sizeof(DMA_PacketHeader_t);
+ recv_func_packet = temp_packet;
+
+ } /* End: Set up temporary copy of split packet */
+
+ else /* Set up for contiguous packet */
+ {
+ recv_func_payload = (char*)packet_ptr +
+ sizeof(DMA_PacketHeader_t);
+ recv_func_packet = packet_ptr;
+ }
+
+ /* Call the receive function */
+ if( recv_func_ptr )
+ {
+ rc = (*recv_func_ptr)(fifo_ptr,
+ recv_func_packet,
+ recv_func_parm,
+ recv_func_payload,
+ io_vec.num_bytes[0]+io_vec.num_bytes[1]);
+ }
+ else
+ {
+ printk(KERN_ERR "DMA_RecFifoPollNormalFifos recv_func_ptr was NULL recv_func_id=%02x fifo_ptr=%p recv_func_packet=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+ recv_func_id,fifo_ptr,recv_func_packet,recv_func_parm,recv_func_payload,io_vec.num_bytes[0]+io_vec.num_bytes[1]) ;
+ }
+
+ /* Increment the head by the size of the packet */
+ DMA_RecFifoIncrementHead(fifo_ptr,
+ (io_vec.num_bytes[0]+
+ io_vec.num_bytes[1] +
+ sizeof(DMA_PacketHeader_t))>> 4);
+
+ num_processed++;
+
+ if ( rc != 0 ) /* Did receive function fail? */
+ {
+#if defined(BGP_DD1_WORKAROUNDS)
+ NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */
+ /* likely not encountered a fifo full */
+ /* condition and stopped. Reset the */
+ /* fifo counter so we will start */
+ /* tracking empty calls to poll. */
+#endif
+ /* Clear the threshold crossed condition, in case we have gone below
+ * the threshold.
+ */
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ fifo_bit_number,
+ 0 );
+ return (rc); /* Yes...return that return code */
+ }
+
+ if ( num_processed >= total_packets ) /* Got what they wanted? */
+ {
+#if defined(BGP_DD1_WORKAROUNDS)
+ NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */
+ /* likely not encountered a fifo full */
+ /* condition and stopped. Reset the */
+ /* fifo counter so we will start */
+ /* tracking empty calls to poll. */
+#endif
+ /* Clear the threshold crossed condition, in case we have gone below
+ * the threshold.
+ */
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ fifo_bit_number,
+ 0 );
+ return (num_processed); /* Yes...all done */
+ }
+
+ num_packets_in_fifo++;
+
+ } /* End: Process up to packets_per_fifo packets in this fifo */
+
+ /*
+ * We exited the loop processing the fifo_index fifo.
+ * - If we exited because we reached the packets_per_fifo limit, we want
+ * to turn off this fifo's not-empty status in our shadow copy of the
+ * status so we process all of the other fifos before re-fetching the
+ * true status, giving this fifo another chance.
+ * - If we exited because the fifo was empty according to our snapshot
+ * of the fifo's tail (head == tail snapshot), we want to turn off this
+ * fifo's not-empty status in our shadow copy of the status so we
+ * process all of the other fifos before re-fetching the true status and
+ * tail for this fifo, giving this fifo another chance.
+ * Either way, we turn off the status bit.
+ *
+ */
+ not_empty_status &= ~(fifo_bit_number);
+
+ /* Clear the threshold crossed condition, in case we have gone below
+ * the threshold.
+ */
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ fifo_bit_number,
+ 0 );
+
+#ifdef DEBUG_PRINT
+ printf("PollNormal: Turning off status bit 0x%08x, status=0x%08x\n",fifo_bit_number,not_empty_status);
+#endif
+
+ /* Bump to next fifo */
+ fifo_index = (fifo_index+1) % num_fifos_in_group;
+
+ /*
+ * If we have processed the max number of packets from the previous fifo,
+ * the recommended next fifo to process is the one after that.
+ *
+ */
+ if ( num_packets_in_fifo == packets_per_fifo )
+ {
+ *next_fifo_index = fifo_index;
+ }
+
+ } /* End: Keep looping through the fifos. */
+
+} /* End: DMA_RecFifoPollNormalFifos() */
+
+
+
+
+
+static int dumpmem_count ;
+
+static inline void quadcpy(void *dest, const void *src)
+{
+ unsigned int *desti=(unsigned int *) dest ;
+ const unsigned int *srci=(const unsigned int *) src ;
+ unsigned int w0 = srci[0] ;
+ unsigned int w1 = srci[1] ;
+ unsigned int w2 = srci[2] ;
+ unsigned int w3 = srci[3] ;
+ desti[0] = w0 ;
+ desti[1] = w1 ;
+ desti[2] = w2 ;
+ desti[3] = w3 ;
+}
+/*!
+ * \brief Poll Normal Reception Fifo Given a Fifo Group and Fifo ID
+ *
+ * Poll the specified "normal" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1. num_packets packets are received
+ * 2. The specified fifo is empty
+ * 3. A receive function returns a non-zero value
+ * 4. The last packet removed from the fifo has an invalid registration id. The
+ * error receive function will have been called, but polling ends.
+ * The invalid packet is counted as a processed packet, and the return
+ * code from the error receive function is returned.
+ *
+ * If the specified fifo has a packet, the appropriate receive function is
+ * called. Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing num_packets packets in the fifo (or emptying that fifo),
+ * the function returns the number of packets processed *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location. The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in] num_packets The maximum number of packets that will be
+ * processed.
+ * \param[in] fifo_id The ID of the fifo to be polled.
+ * (0 through
+ * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in] num_empty_passes When the not-empty status indicates that all
+ * fifos in the group are emtpy, this is the
+ * number of times the not-empty status is
+ * re-fetched and re-checked before officially
+ * declaring that they are indeed empty.
+ * (0 means no extra passes are made).
+ * \param[in] not_empty_poll_delay The number of pclks to delay between polls
+ * of the not-empty status when the fifos are
+ * empty.
+ * \param[in] fg_ptr Pointer to the fifo group.
+ *
+ * \param[in] empty_callback Function to call when spinning because the FIFO looks empty.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ *
+ */
+int DMA_RecFifoPollNormalFifoById( int num_packets,
+ int fifo_id,
+ int num_empty_passes,
+ int not_empty_poll_delay,
+ DMA_RecFifoGroup_t *fg_ptr,
+ void (*empty_callback)(void)
+ )
+{
+ int num_packets_in_fifo; /* Count of packets processed in a */
+ /* fifo. */
+ unsigned int status; /* Snapshot of the not empty status*/
+ /* for this group. */
+ int rc = 0; /* Return code from recv_func. */
+ int num_processed = 0; /* Number of packets processed */
+ DMA_PacketIovec_t io_vec; /* Payload I/O vector */
+ DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function */
+ void *recv_func_parm;/* Receive function parameter */
+ int recv_func_id; /* Function ID from the packet */
+ /* header. */
+ void *recv_func_payload;/* Pointer to recv func payload */
+ void *recv_func_packet; /* Pointer to recv func packet */
+ DMA_RecFifo_t *fifo_ptr; /* Pointer to fifo being processed */
+ char temp_packet[256] ALIGN_QUADWORD; /* Temporary packet copy. */
+ /* Align for efficient copying. */
+ char *load_ptr, *store_ptr; /* Used for copying bytes */
+ int num_quads; /* Number of quads to copy */
+ DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */
+ int passes; /* Counter of not-empty passes */
+
+ SPI_assert( num_packets > 0 );
+ SPI_assert( num_empty_passes >= 0 );
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fifo_id >= 0 ) &&
+ ( fifo_id < DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP ) );
+
+ fifo_ptr = &(fg_ptr->fifos[fifo_id]);
+
+ /*
+ * Loop until the specified fifo is declared empty, or
+ * until one of the termination conditions documented in the prolog occurs.
+ *
+ */
+ for (;;)
+ {
+ /*
+ * If the DMA SRAM not-empty status for this fifo is zero (the fifo is
+ * empty), the status is checked num_empty_passes times with a slight
+ * delay in between to give the DMA time to make progress before declaring
+ * that the fifo is truely empty.
+ */
+ passes = num_empty_passes;
+ status = DMA_RecFifoGetNotEmptyById( fg_ptr,
+ fifo_id ); /* Get Normal fifo */
+ /* not-empty status. */
+ while ( ( status == 0 ) &&
+ ( num_empty_passes-- > 0 ) )
+ {
+ /* Delay, allowing the DMA to update its status */
+ unsigned int pclks = not_empty_poll_delay;
+ (*empty_callback)() ;
+ while( pclks-- )
+ {
+ asm volatile("nop;");
+ }
+
+ /* Re-fetch the not-empty status */
+ status = DMA_RecFifoGetNotEmptyById(
+ fg_ptr,
+ fifo_id ); /* Get Normal fifo */
+ /* not-empty status.*/
+ }
+
+ if ( status == 0 ) { /* Fifo is empty? */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+ if (num_processed > 0) { /* Did we process at least 1 packet? */
+ NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */
+ /* likely not encountered a fifo full */
+ /* condition and stopped. Reset the */
+ /* fifo counter so we will start */
+ /* tracking empty calls to poll. */
+ /* Clear the threshold crossed condition, in case we have gone below
+ * the threshold.
+ */
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ _BN(fifo_ptr->global_fifo_id),
+ 0 );
+ }
+ else {
+ if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+ (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+ /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+ rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+ /* reception fifos filled and the */
+ /* DMA has stopped. */
+ /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+ NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */
+ /* fill-fifo counter. */
+ }
+ }
+#endif
+
+ return (num_processed);
+ }
+
+ /* The fifo has something in it.
+ * Update its shadow va_tail pointer to reflect the amount of packet
+ * data in the fifo.
+ */
+ DMA_RecFifoGetTailById( fg_ptr,
+ fifo_id );
+
+ num_packets_in_fifo = 0;
+
+ /*
+ * MSYNC before we look at the data in the fifo to ensure that snoops
+ * issued by the DMA have completed. This ensures the L1 cache
+ * invalidations have completed so we don't look at stale data.
+ */
+ _bgp_msync();
+
+ /*
+ * Within a fifo: The area between the va_head and va_tail shadow pointers
+ * contains packets to be processed. Loop, processing those packets until
+ * we have processed packets_per_fifo of them, or all of them, or other
+ * issues come up.
+ *
+ */
+#if defined(CONFIG_BGP_STATISTICS)
+ {
+ unsigned int tail = (unsigned int) fifo_ptr->dma_fifo.va_tail ;
+ unsigned int head = (unsigned int) fifo_ptr->dma_fifo.va_head ;
+ unsigned int end = (unsigned int) fifo_ptr->dma_fifo.va_end ;
+ unsigned int start = (unsigned int) fifo_ptr->dma_fifo.va_start ;
+ unsigned int used_space = ( tail >= head ) ? (tail-head) : ((tail-start)+(end-head)) ;
+ reception_fifo_histogram[fls(used_space >> 4)] += 1 ;
+ if( used_space > reception_hi_watermark ) reception_hi_watermark = used_space ;
+
+/* unsigned int used_space = (fifo_ptr->dma_fifo.va_tail >= fifo_ptr->dma_fifo.va_head) */
+/* ? ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) */
+/* : (fifo_ptr->dma_fifo.fifo_size + ( ((unsigned)(fifo_ptr->dma_fifo.va_tail) - (unsigned)(fifo_ptr->dma_fifo.va_head)) >> 4 ) ) */
+/* ; */
+/* reception_fifo_histogram[fls(used_space)] += 1 ; */
+ }
+#endif
+ while ( ( num_packets_in_fifo < num_packets ) &&
+ ( fifo_ptr->dma_fifo.va_head != fifo_ptr->dma_fifo.va_tail ) )
+ {
+ DMA_RecFifoGetAddresses( fifo_ptr,
+ &io_vec ); /* Get the payload pointer(s) */
+ /* for the packet at the head */
+ /* of the fifo. */
+
+ packet_ptr = (DMA_PacketHeader_t*)
+ fifo_ptr->dma_fifo.va_head; /* Point to packet header*/
+ /*
+ * Determine the receive function to call. Index into
+ * recvFunctions array is in the packet header.
+ */
+ recv_func_id = packet_ptr->Func_Id;
+ recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id];
+ if ( recv_func_ptr != NULL )
+ {
+ recv_func_parm =
+ DMA_RecFifoInfo.recvFunctionsParms[recv_func_id];
+ }
+ else
+ {
+ recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction;
+ recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+ }
+ /*
+ * Use a temporary copy of the packet, when the payload
+ * wraps.
+ */
+ if ( io_vec.num_segments > 1 )
+ {
+ /* Copy packet header and first payload segment */
+ load_ptr = (char*)packet_ptr;
+ store_ptr = temp_packet;
+ num_quads = (sizeof(DMA_PacketHeader_t) + io_vec.num_bytes[0]) >> 4;
+ while ( num_quads > 0 )
+ {
+ /* Don't bother doing this via doublehummer; it only happens 'occasionally' and means the caller has to enable for floating-point */
+ quadcpy(store_ptr,load_ptr) ;
+/* _bgp_QuadLoad ( load_ptr, 0 ); */
+/* _bgp_QuadStore( store_ptr, 0 ); */
+ load_ptr += 16;
+ store_ptr += 16;
+ num_quads--;
+ }
+ /* Copy second payload segment */
+ load_ptr = (char*)io_vec.payload_ptr[1];
+ num_quads = io_vec.num_bytes[1] >> 4;
+ while ( num_quads > 0 )
+ {
+ quadcpy(store_ptr,load_ptr) ;
+/* _bgp_QuadLoad ( load_ptr, 0 ); */
+/* _bgp_QuadStore( store_ptr, 0 ); */
+ load_ptr += 16;
+ store_ptr += 16;
+ num_quads --;
+ }
+ recv_func_payload = temp_packet + sizeof(DMA_PacketHeader_t);
+ recv_func_packet = temp_packet;
+
+ } /* End: Set up temporary copy of split packet */
+
+ else /* Set up for contiguous packet */
+ {
+ recv_func_payload = (char*)packet_ptr +
+ sizeof(DMA_PacketHeader_t);
+ recv_func_packet = packet_ptr;
+ }
+
+ /* Call the receive function */
+ if( recv_func_ptr )
+ {
+/* dumpmem(recv_func_packet-32, 128, "Software FIFO around call") ; */
+ rc = (*recv_func_ptr)(fifo_ptr,
+ recv_func_packet,
+ recv_func_parm,
+ recv_func_payload,
+ io_vec.num_bytes[0]+io_vec.num_bytes[1]);
+ }
+ else
+ {
+ printk(KERN_ERR "DMA_RecFifoPollNormalFifoById recv_func_ptr was NULL recv_func_id=%02x fifo_ptr=%p recv_func_packet=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+ recv_func_id,fifo_ptr,recv_func_packet,recv_func_parm,recv_func_payload,io_vec.num_bytes[0]+io_vec.num_bytes[1]) ;
+ if( dumpmem_count < 10 )
+ {
+ dumpmem(recv_func_packet-256, 512, "Software FIFO around misread") ;
+ dumpmem_count += 1 ;
+ }
+/* show_tlbs((unsigned int) recv_func_packet) ; */
+/* (void)dma_map_single(NULL,recv_func_packet-32, 128,DMA_FROM_DEVICE) ; */
+/* dumpmem(recv_func_packet-32, 128, "Software FIFO around misread after cache discard") ; */
+ }
+
+ /* Increment the head by the size of the packet */
+ DMA_RecFifoIncrementHead(fifo_ptr,
+ (io_vec.num_bytes[0]+
+ io_vec.num_bytes[1] +
+ sizeof(DMA_PacketHeader_t))>> 4);
+
+ num_processed++;
+
+ if ( rc != 0 ) /* Did receive function fail? */
+ {
+ /* Clear the threshold crossed condition, in case we have gone below
+ * the threshold.
+ */
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ _BN(fifo_ptr->global_fifo_id),
+ 0 );
+ return (rc); /* Yes...return that return code */
+ }
+
+ if ( num_processed >= num_packets ) /* Got what they wanted? */
+ {
+ /* Clear the threshold crossed condition, in case we have gone below
+ * the threshold.
+ */
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ _BN(fifo_ptr->global_fifo_id),
+ 0 );
+ return (num_processed); /* Yes...all done */
+ }
+
+ num_packets_in_fifo++;
+
+ } /* End: Process up to packets_per_fifo packets in this fifo */
+
+ } /* End: Keep looping through the fifo. */
+
+} /* End: DMA_RecFifoPollNormalFifoById() */
+
+
+
+
+/*!
+ *
+ * \brief Prime Receive Function Cache for Polling Function
+ *
+ * The reception fifo receive function maintains a simple cache of information
+ * about the last receive function called. This function is called to return
+ * that information for a given function ID.
+ *
+ * \param [in] recv_func_id The function ID whose receive function info
+ * is to be returned.
+ * \param [out] recv_func_ptr Pointer to the receive function's address,
+ * returned by this function.
+ * \param [out] recv_func_parm Pointer to the receive function's parameter.
+ *
+ * \return The information (function pointer and function parameter) for the
+ * specified receive function is returned as described.
+ */
+inline
+void DMA_RecFifoPollPrimeRecvFuncCache( int recv_func_id,
+ DMA_RecFifoRecvFunction_t *recv_func_ptr,
+ void **recv_func_parm )
+{
+ DMA_RecFifoRecvFunction_t local_recv_func_ptr;
+ void *local_recv_func_parm;
+
+ local_recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id];
+ if ( local_recv_func_ptr != NULL ) {
+ local_recv_func_parm =
+ DMA_RecFifoInfo.recvFunctionsParms[recv_func_id];
+ }
+ else {
+ local_recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction;
+ local_recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+ }
+ *recv_func_ptr = local_recv_func_ptr;
+ *recv_func_parm= local_recv_func_parm;
+
+} /* End: DMA_RecFifoPrimeRecvFuncCache() */
+
+
+
+
+/*!
+ *
+ * \brief Process a Wrap of a Reception Fifo While Polling
+ *
+ * This function is meant to be called by a polling function that has processed
+ * packets in a reception fifo such that there are just a few left to be
+ * processed before it hits the end of the fifo and wraps. This function
+ * processes those packets at the end of the fifo until the wrap occurs,
+ * and then returns, leaving the rest of the packets in the fifo to be
+ * processed by the calling function.
+ *
+ * \param[in] rec_fifo_ptr Pointer to reception fifo
+ * \param[in,out] va_head Pointer to the fifo's virtual address
+ * head. Updated by this function.
+ * \param[in,out] va_tail Pointer to the fifo's virtual address
+ * tail. Updated by this function.
+ * \param[in,out] num_processed Pointer to the number of packets
+ * processed by the calling poll
+ * function. Updated by this function.
+ * \param[in,out] num_processed_in_fifo Pointer to the number of packets
+ * in this particular fifo processed
+ * by the calling poll function.
+ * Updated by this function.
+ * \param[in] max_num_packets The max number of packets that can be
+ * processed before the poll function
+ * must return.
+ * \param[in] max_num_packets_in_fifo The max number of packets that can be
+ * processed in this fifo.
+ *
+ * \retval 0 Processing complete successfully. Output
+ * parameters have been updated as described.
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end.
+ */
+
+int DMA_RecFifoPollProcessWrap ( DMA_RecFifo_t *rec_fifo_ptr,
+ void **va_head,
+ void *va_tail,
+ int *num_processed,
+ int *num_processed_in_fifo,
+ int max_num_packets,
+ int max_num_packets_in_fifo) {
+ int rc = 0;
+ DMA_PacketIovec_t io_vec; /* Payload I/O vector */
+ DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */
+ DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function */
+ void *recv_func_parm;/* Receive function parameter */
+ int recv_func_id; /* Function ID from the packet */
+ /* header. */
+ void *recv_func_payload;/* Pointer to recv func payload */
+ void *recv_func_packet; /* Pointer to recv func packet */
+ char temp_packet[256] ALIGN_QUADWORD; /* Temporary packet copy. */
+ /* Align for efficient copying. */
+ char *load_ptr, *store_ptr; /* Used for copying bytes */
+ int num_quads; /* Number of quads to copy */
+
+ while ( rc == 0 ) { /* Loop while things are good until we exit after */
+ /* processing the wrap. */
+
+ DMA_RecFifoGetAddresses( rec_fifo_ptr,
+ &io_vec ); /* Get the payload pointer(s) */
+ /* for the packet at the head */
+ /* of the fifo. */
+
+ packet_ptr = (DMA_PacketHeader_t*)
+ rec_fifo_ptr->dma_fifo.va_head; /* Point to packet header */
+
+ /*
+ * Determine the receive function to call. Index into
+ * recvFunctions array is in the packet header.
+ */
+ recv_func_id = packet_ptr->Func_Id;
+ recv_func_ptr = DMA_RecFifoInfo.recvFunctions[recv_func_id];
+ if ( recv_func_ptr != NULL )
+ {
+ recv_func_parm =
+ DMA_RecFifoInfo.recvFunctionsParms[recv_func_id];
+ }
+ else
+ {
+ recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction;
+ recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+ }
+ /*
+ * Use a temporary copy of the packet, when the payload
+ * wraps.
+ */
+ if ( io_vec.num_segments > 1 )
+ {
+ /* Copy packet header and first payload segment */
+ load_ptr = (char*)packet_ptr;
+ store_ptr = temp_packet;
+ num_quads = (sizeof(DMA_PacketHeader_t) + io_vec.num_bytes[0]) >> 4;
+ while ( num_quads > 0 )
+ {
+ _bgp_QuadLoad ( load_ptr, 0 );
+ _bgp_QuadStore( store_ptr, 0 );
+ load_ptr += 16;
+ store_ptr += 16;
+ num_quads --;
+ }
+ /* Copy second payload segment */
+ load_ptr = (char*)io_vec.payload_ptr[1];
+ num_quads = io_vec.num_bytes[1] >> 4;
+ while ( num_quads > 0 )
+ {
+ _bgp_QuadLoad ( load_ptr, 0 );
+ _bgp_QuadStore( store_ptr, 0 );
+ load_ptr += 16;
+ store_ptr += 16;
+ num_quads --;
+ }
+ recv_func_payload = temp_packet + sizeof(DMA_PacketHeader_t);
+ recv_func_packet = temp_packet;
+
+ } /* End: Set up temporary copy of split packet */
+
+ else /* Set up for contiguous packet */
+ {
+ recv_func_payload = (char*)packet_ptr +
+ sizeof(DMA_PacketHeader_t);
+ recv_func_packet = packet_ptr;
+ }
+
+ /* Call the receive function */
+ if( recv_func_ptr)
+ {
+ rc = (*recv_func_ptr)(rec_fifo_ptr,
+ recv_func_packet,
+ recv_func_parm,
+ recv_func_payload,
+ io_vec.num_bytes[0]+io_vec.num_bytes[1]);
+ }
+ else
+ {
+ printk(KERN_ERR "DMA_RecFifoPollProcessWrap recv_func_ptr was NULL recv_func_id=%02x rec_fifo_ptr=%p recv_func_packet=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+ recv_func_id,rec_fifo_ptr,recv_func_packet,recv_func_parm,recv_func_payload,io_vec.num_bytes[0]+io_vec.num_bytes[1]) ;
+
+ }
+
+ /* Increment the head by the size of the packet */
+ DMA_RecFifoIncrementHead(rec_fifo_ptr,
+ (io_vec.num_bytes[0]+
+ io_vec.num_bytes[1] +
+ sizeof(DMA_PacketHeader_t))>> 4);
+ *va_head = rec_fifo_ptr->dma_fifo.va_head; /* Refresh caller's head */
+
+ (*num_processed)++;
+ (*num_processed_in_fifo)++;
+
+#ifdef DEBUG_PRINT
+ printf("PollWrap: num_processed=%d, va_head=0x%08x, Part1Len=%d, Part2Len=%d, Part1Ptr=0x%08x, Part2Ptr=0x%08x\n",*num_processed,(unsigned)*va_head,io_vec.num_bytes[0],io_vec.num_bytes[1],(unsigned)io_vec.payload_ptr[0],(unsigned)io_vec.payload_ptr[1]);
+#endif
+
+ if ( ( (unsigned)*va_head < (unsigned)packet_ptr ) || /* Did we wrap? */
+ ( *num_processed >= max_num_packets ) || /* Got enough packets? */
+ ( *num_processed_in_fifo > max_num_packets_in_fifo ) ) /* Got enough */
+ /* packets for this fifo? */
+ {
+ break;
+ }
+
+ } /* End: Keep looping through the fifo. */
+
+ return(rc);
+
+} /* End: DMA_RecFifoPollProcessWrap() */
+
+
+/*!
+ * \brief Simple Poll Normal Reception Fifos
+ *
+ * Poll the "normal" reception fifos in the specified fifo group, removing one
+ * packet after another from the fifos, dispatching the appropriate receive
+ * function for each packet, until one of the following occurs:
+ * 1. All packets in all of the fifos have been received.
+ * 2. A receive function returns a non-zero value.
+ * 3. The last packet removed from a fifo has an invalid registration id. The
+ * error receive function will have been called, but polling ends.
+ * The invalid packet is counted as a processed packet, and the return
+ * code from the error receive function is returned.
+ * 4. There have been fruitfulPollLimit polls attempted (summed across all
+ * fifos).
+ *
+ * Polling occurs in a round-robin fashion through the array of normal fifos in
+ * the group. If a fifo has a packet, the appropriate receive function is
+ * called. Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing all of the packets in a fifo (or emptying that fifo),
+ * the next fifo in the group is processed. When the last index in the fifo
+ * array is processed, processing continues with the first fifo in the array.
+ * Multiple loops through the array of fifos in the group may occur until all
+ * fifos are empty or fruitfulPollLimit polls have been completed.
+ *
+ * It is risky to set the fruitfulPollLimit to zero, allowing this function to
+ * poll indefinitely as long as there are packets to be processed. This may
+ * starve the node in a scenario where other nodes send "polling" packets to
+ * our node, and our node never gets a chance to do anything else except
+ * process those polling packets.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always return a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location. The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in] fg_ptr Pointer to the fifo group.
+ * \param[in] fruitfulPollLimit The limit on the number of fruitful polls that
+ * will be attempted (summed across all fifos).
+ * If the limit is reached, this function
+ * returns. A value of zero means there is no
+ * limit imposed. A fruitful poll is one where
+ * at least one packet has arrived in the fifo
+ * since the last poll.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ *
+ */
+int DMA_RecFifoSimplePollNormalFifos( DMA_RecFifoGroup_t *fg_ptr,
+ int fruitfulPollLimit)
+{
+ int rc = 0; /* Return code from recv_func. */
+ int num_processed = 0; /* Number of packets processed */
+ int num_processed_in_fifo = 0; /* Not used, but needed for calling*/
+ /* wrap function. */
+ int fruitfulPollCount; /* Number of fruitful polls. */
+
+ /*
+ *The following is actually a cache of the last receive function called.
+ * We cache it so we don't need to keep looking up the receive function
+ * info on each packet.
+ */
+ DMA_RecFifoRecvFunction_t recv_func_ptr=NULL; /* Pointer to receive function*/
+ void *recv_func_parm=NULL;;/* Receive function parameter */
+ int recv_func_id=-1; /* Function ID from the packet */
+ /* header. Init to -1 means */
+ /* recv_func_ptr and */
+ /* recv_func_parm do not cache */
+ /* the previous packet values. */
+
+ DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */
+ unsigned int packet_bytes; /* Number of bytes in the packet. */
+ unsigned int wrap; /* 1: A wrap of the fifo is going */
+ /* to occur. */
+ /* 0: No wrap is going to occur. */
+
+ /*
+ * Processing of packets occurs in the fifo in three phases:
+ * Normal Phase 1 : Packets before the wrap.
+ * Handle Wrap Phase: Packets during the wrap.
+ * Normal Phase 2 : Packets after the wrap.
+ */
+ void *va_logical_tail; /* The point beyond which normal */
+ /* processing of packets ends. */
+ void *va_starting_head; /* Pointer to the first packet in */
+ /* a contiguous group extracted */
+ /* from the fifo. */
+ void *va_nextHead; /* Pointer to the next packet to */
+ /* be processed. */
+ void *va_tail; /* Snapshot of the fifo's tail. */
+ unsigned int num_packets_processed_since_moving_fifo_head; /*
+ Tells us when we should move the
+ hardware head. */
+
+ /*
+ * Control variables for looping through the fifos
+ */
+ int fifo_index=0; /* Index of fifo being processed. */
+ /* Start with first fifo. */
+ unsigned int fifo_bit_number; /* The bit number of the fifo */
+ /* being processed. Group0: 0-7, */
+ /* Group1: 8-15, Group2: 16-23, */
+ /* Group3: 24-31. Corresponds to */
+ /* the DMA not-empty status bits. */
+ int num_fifos_in_group; /* Number of fifos in this group. */
+ int num_packets_in_fifo; /* Count of packets processed in a */
+ /* fifo. */
+ unsigned int not_empty_status=0; /* Snapshot of the not empty status*/
+ /* for this group. 0 indicates */
+ /* that no snapshot has occurred */
+ /* yet. */
+ DMA_RecFifo_t *rec_fifo_ptr; /* Pointer to reception fifo being */
+ /* processed. */
+
+
+ SPI_assert( fg_ptr != NULL );
+
+ num_fifos_in_group = fg_ptr->num_normal_fifos;
+
+ /*
+ * Start the fruitful poll count at the max.
+ * For unlimited, set to a very high value.
+ */
+ fruitfulPollCount = (fruitfulPollLimit == 0) ? 0x7FFFFFFF : fruitfulPollLimit;
+
+ /*
+ * Circularly loop through the not-empty fifos in the fifo group.
+ * Keep going until one of the termination conditions documented in the
+ * prolog occurs.
+ *
+ */
+ for (;;) {
+ /*
+ * Find the next fifo to process.
+ */
+ rec_fifo_ptr = &fg_ptr->fifos[fifo_index]; /* This is the fifo itself*/
+ fifo_bit_number = _BN(rec_fifo_ptr->global_fifo_id);/* fifo's status bit*/
+
+ fifo_index = DMA_RecFifoGetNextFifo(fg_ptr,
+ fifo_index,
+ &fifo_bit_number,
+ 0, /* num_empty_passes */
+ 0, /* not_empty_poll_delay */
+ &not_empty_status);
+ if (fifo_index < 0) { /* No more packets to process? */
+#if defined(BGP_DD1_WORKAROUNDS)
+ /*
+ *
+ * If there are no more non-empty fifos, count the number of consecutive
+ * times the poll function came up dry (num_processed == 0), and if it
+ * exceeds a threshold, issue a system call to clear the rDMA's "full
+ * reception fifo" condition so it begins to receive packets again.
+ *
+ * When a non-empty fifo is returned, its shadow va_tail pointer has been
+ * updated to reflect the amount of packet data in the fifo.
+ */
+ if (num_processed > 0) { /* Did we process at least 1 packet? */
+ NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */
+ /* likely not encountered a fifo full */
+ /* condition and stopped. Reset the */
+ /* fifo counter so we will start */
+ /* tracking empty calls to poll. */
+ }
+ else {
+ if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+ (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+ /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+ rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+ /* reception fifos filled and the */
+ /* DMA has stopped. */
+ /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+ NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */
+ /* fill-fifo counter. */
+ }
+ }
+#endif
+ /* printf("Poll: returned %d processed\n",num_processed); */
+ return (num_processed);
+ }
+
+ num_packets_in_fifo = 0;
+
+ /*
+ * Establish pointers to the reception fifo and the DMA fifo.
+ * Snapshot the hardware head and tail pointers...they may change while we
+ * are running. We will snapshot the tail again after processing everything
+ * up to this snapshot, until the fifo is empty (head == tail).
+ */
+ rec_fifo_ptr = &(fg_ptr->fifos[fifo_index]);
+ DMA_Fifo_t *fifo_ptr = &(rec_fifo_ptr->dma_fifo);
+ void *va_head = fifo_ptr->va_head;
+ va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */
+ /* tail. */
+ num_packets_processed_since_moving_fifo_head =
+ rec_fifo_ptr->num_packets_processed_since_moving_fifo_head; /* Fetch */
+ /* for later use. */
+
+#if defined(CONFIG_BGP_STATISTICS)
+ {
+ unsigned int used_space = (fifo_ptr->va_tail >= fifo_ptr->va_head)
+ ? ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 )
+ : (fifo_ptr->fifo_size + ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 ) )
+ ;
+ reception_fifo_histogram[fls(used_space)] += 1 ;
+ }
+#endif
+ /*
+ * Loop processing packets until the fifo is empty or until the fruitful poll
+ * limit is reached.
+ * At the top of the loop, we have a new snapshot of the tail, so something
+ * may have appeared in the fifo.
+ */
+ while ( ( rc == 0 ) &&
+ ( va_tail != va_head ) &&
+ ( fruitfulPollCount > 0) ) { /* Is there something in this fifo? */
+ /* Yes... */
+ fruitfulPollCount--; /* Count the polls */
+
+ /*
+ * MSYNC before we look at the data in the fifo to ensure that snoops
+ * issued by the DMA have completed. This ensures the L1 cache
+ * invalidations have completed so we don't look at stale data.
+ */
+ _bgp_msync();
+
+ /*
+ * Touch the first packet right away so it is is loaded into the memory
+ * cache before we try to use it.
+ */
+ _bgp_dcache_touch_line( va_head );
+
+ /*
+ * Prepare to split up the processing between "normal" and "handleWrap".
+ * Establish a "logicalTail" which is the point beyond which "normal"
+ * processing changes to "handleWrap" processing.
+ */
+ if ( va_head < va_tail ) { /* No wrap will occur? */
+ wrap = 0;
+ va_logical_tail = va_tail; /* Logical tail is the physical tail */
+ }
+ else { /* Wrap will occur. Logical tail is 256 bytes before the end
+ * of the fifo. We need to stop normal phase 1 there because
+ * that is the first point at which the next packet could wrap.
+ */
+ wrap = 1;
+ va_logical_tail = (void*)( ((unsigned)fifo_ptr->va_end) - 256 );
+ }
+
+ /* Loop processing packets until we hit our tail snapshot */
+ while ( ( rc == 0 ) &&
+ ( va_head != va_tail ) ) {
+ /*
+ * Process packets that do not wrap. This is everything up to the
+ * logical tail. This gets executed both before and after wrapping.
+ * This is normal phase 1 and normal phase 2.
+ */
+ va_starting_head = va_head;
+
+ while ( ( rc == 0 ) &&
+ ( va_head < va_logical_tail ) ) {
+
+ packet_ptr = (DMA_PacketHeader_t*)va_head;
+ packet_bytes = (packet_ptr->Chunks + 1) << 5;
+
+ /*
+ * Touch the NEXT packet to ensure it will be in L1 cache when we
+ * are ready for it on the next iteration. Even though the packet will
+ * likely be touched in its entirety by the receive function, and that
+ * will likely cause the processor to perform prefetching of the next
+ * packet, bringing in the next packet now has been shown to improve
+ * bandwidth from 1.41 bytes/cycle to 1.44 bytes/cycle, so we put
+ * this dcbt here.
+ */
+ va_nextHead = (void*) ( (unsigned)va_head + packet_bytes );
+
+ if ( va_nextHead < va_logical_tail )
+ _bgp_dcache_touch_line( va_nextHead );
+
+ /*
+ * Determine the receive function to call.
+ * The packet header Func_Id contains the ID of the function to call.
+ * We cache the previous packet's values because it is likely this
+ * packet will be the same. If not, call out of line function to
+ * re-prime the cache.
+ */
+ if ( packet_ptr->Func_Id != recv_func_id ) {
+ recv_func_id = packet_ptr->Func_Id;
+ DMA_RecFifoPollPrimeRecvFuncCache( recv_func_id,
+ &recv_func_ptr,
+ &recv_func_parm );
+ }
+
+ /* Call the receive function, and no matter what happens, increment
+ * the number of packets processed and move our head snapshot to the
+ * next packet.
+ */
+ if( recv_func_ptr)
+ {
+ rc = (*recv_func_ptr)( rec_fifo_ptr,
+ packet_ptr,
+ recv_func_parm,
+ (char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),
+ packet_bytes - sizeof(DMA_PacketHeader_t) );
+ }
+ else
+ {
+ printk(KERN_ERR "DMA_RecFifoSimplePollNormalFifos recv_func_ptr was NULL recv_func_id=%02x rec_fifo_ptr=%p packet_ptr=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+ recv_func_id,rec_fifo_ptr,packet_ptr,recv_func_parm,(char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),packet_bytes - sizeof(DMA_PacketHeader_t)) ;
+
+ }
+ num_packets_processed_since_moving_fifo_head++;
+ num_packets_in_fifo++;
+
+#ifdef DEBUG_PRINT
+ printf("SimplePollById: num_processed=%d, va_head=0x%08x, va_tail=0x%08x, va_logical_tail=0x%08x, va_end=0x%08x, willWrap=%d\n",num_processed,(unsigned)va_head,(unsigned)va_tail,(unsigned)va_logical_tail,(unsigned)fifo_ptr->va_end,wrap);
+#endif
+
+ va_head = va_nextHead;
+
+ } /* End: Process packets that do not wrap */
+
+ /*
+ * We are done processing all packets prior to the wrap.
+ * If the shadow va_head is not in sync with the hardware head, or if
+ * we are going to wrap, sync up the hardware head and recalculate the
+ * free space. The movement of the head causes the fifo's free space
+ * to be recalculated.
+ *
+ * The wrap function requires that the shadow and hardware heads be in
+ * sync. If we are not wrapping, we condition the syncing of the heads
+ * on whether we have exceeded our limit on the number of packets we
+ * processed in a fifo since the last time we moved the
+ * hardware head. If we have only processed a few packets, we just
+ * leave the hardware head where it is and don't incur the expense of
+ * moving the hardware head. If we have processed at least our limit
+ * of packets, then it is good to move the hardware head.
+ */
+ if ( ( num_packets_processed_since_moving_fifo_head >
+ DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD ) ||
+ ( wrap ) ) {
+
+ DMA_FifoSetHead( fifo_ptr, va_head );
+
+ num_packets_processed_since_moving_fifo_head = 0;
+ }
+
+ /*
+ * If we are anticipating a wrap, go handle the wrap.
+ */
+ if ( ( rc == 0 ) && wrap ) {
+ /*
+ * Handle the wrapping of the fifo. This requires extra checking
+ * and moving of the head, and thus is in its own function.
+ * It is a generic function, used by other poll functions. Some of
+ * these other poll functions have the ability to quit processing
+ * packets when a specified limit is reached overall, or per fifo.
+ * That is what the last two parameters specify. For this poll
+ * function, we don't have any limit...we process packets until the
+ * fifo is empty, so we pass in large unreachable limits.
+ */
+ rc = DMA_RecFifoPollProcessWrap (
+ rec_fifo_ptr,
+ &va_head,
+ va_tail,
+ &num_processed,
+ &num_processed_in_fifo,
+ 0x7FFFFFFF, /* Infinite packet limit, overall */
+ 0x7FFFFFFF);/* Infinite packet limit per fifo */
+
+ va_logical_tail = va_tail; /* Set to actual tail now. */
+ wrap = 0; /* Next time around, don't do wrap processing. */
+ }
+
+ } /* End: Process packets until we hit our snapshotted tail */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+ NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */
+ /* likely not encountered a fifo full */
+ /* condition and stopped. Reset the */
+ /* fifo counter so we will start */
+ /* tracking empty calls to poll. */
+#endif
+
+ va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */
+ /* tail again. */
+
+ } /* End: Loop while there is something in the fifo */
+
+ /*
+ * The fifo is now empty. If we have processed at least one packet,
+ * return the number, or if the receive function returned an error,
+ * return that return code.
+ */
+ if ( num_packets_in_fifo > 0 ) {
+ /* Store in the fifo structure the number of packets processed since
+ * last moving the hardware head, and the current head */
+ rec_fifo_ptr->num_packets_processed_since_moving_fifo_head =
+ num_packets_processed_since_moving_fifo_head;
+ fifo_ptr->va_head = va_head;
+ num_processed += num_packets_in_fifo;
+ /* Clear the threshold crossed condition, in case we have gone below
+ * the threshold.
+ */
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ _BN(rec_fifo_ptr->global_fifo_id),
+ 0 );
+
+ /* If the receive function returned an error, exit with that error now */
+ if ( rc ) return (rc);
+ }
+ /*
+ * We exited the loop processing the fifo_index fifo.
+ * - If we exited because the fifo was empty according to our snapshot
+ * of the fifo's tail (head == tail snapshot), we want to turn off this
+ * fifo's not-empty status in our shadow copy of the status so we
+ * process all of the other fifos before re-fetching the true status and
+ * tail for this fifo, giving this fifo another chance.
+ */
+ not_empty_status &= ~(fifo_bit_number);
+
+#ifdef DEBUG_PRINT
+ printf("PollNormal: Turning off status bit 0x%08x, status=0x%08x\n",fifo_bit_number,not_empty_status);
+#endif
+
+ /* Bump to next fifo */
+ fifo_index = (fifo_index+1) % num_fifos_in_group;
+
+ } /* End: for loop processing reception fifos */
+
+} /* End: DMA_RecFifoSimplePollNormalFifos() */
+
+
+
+
+/*!
+ * \brief Simple Poll Normal Reception Fifo Given a Fifo Group and Fifo ID
+ *
+ * Poll the specified "normal" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1. All packets in the fifo have been received.
+ * 2. The specified fifo is empty.
+ * 3. A receive function returns a non-zero value.
+ * 4. The last packet removed from the fifo has an invalid registration id. The
+ * error receive function will have been called, but polling ends.
+ * The invalid packet is counted as a processed packet, and the return
+ * code from the error receive function is returned.
+ * 5. There have been fruitfulPollLimit polls attempted.
+ *
+ * If the specified fifo has a packet, the appropriate receive function is
+ * called. Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing all of the packets in the fifo (emptying that fifo),
+ * or the fruitfulPollLimit has been reached, the function returns the number
+ * of packets processed.
+ *
+ * It is risky to set the fruitfulPollLimit to zero, allowing this function to
+ * poll indefinitely as long as there are packets to be processed. This may
+ * starve the node in a scenario where other nodes send "polling" packets to
+ * our node, and our node never gets a chance to do anything else except
+ * process those polling packets.
+ *
+ * The receive functions must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header, pointer to the payload, and
+ * length of the payload. The packet header is always be 16 bytes of
+ * contiguous storage, in the fifo. Because the fifo is a circular buffer,
+ * the payload of a packet may wrap from the end of the fifo to the beginning.
+ * For large fifos, this happens infrequently. To make it easier for
+ * user/messaging code, the poll function will always pass a starting payload
+ * address and number of bytes so that the receive function can treat the packet
+ * as contiguous storage in memory. If the packet does not wrap, the starting
+ * payload address will be a pointer to the appropriate address in the fifo.
+ * If the packet does wrap, the poll function will copy bytes from the fifo to
+ * a contiguous buffer (on the stack) and call the receive function with a
+ * payload pointer pointing to this temporary buffer. In either case, when the
+ * receive function returns, user code cannot assume that the payload buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function has
+ * to copy it to some other location. The packet header and payload are
+ * 16-byte aligned for optimized copying.
+ *
+ * \param[in] fifo_id The ID of the fifo to be polled.
+ * (0 through
+ * DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP-1).
+ * \param[in] fg_ptr Pointer to the fifo group.
+ * \param[in] fruitfulPollLimit The limit on the number of fruitful polls that
+ * will be attempted.
+ * If the limit is reached, this function
+ * returns. A value of zero means there is no
+ * limit imposed. A fruitful poll is one where
+ * at least one packet has arrived in the fifo
+ * since the last poll.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ *
+ */
+int DMA_RecFifoSimplePollNormalFifoById( int fifo_id,
+ DMA_RecFifoGroup_t *fg_ptr,
+ int fruitfulPollLimit
+ )
+{
+ int rc = 0; /* Return code from recv_func. */
+ int num_processed = 0; /* Number of packets processed */
+ int num_processed_in_fifo = 0; /* Not used, but needed for calling*/
+ /* wrap function. */
+ int fruitfulPollCount; /* Number of fruitful polls. */
+
+ /*
+ *The following is actually a cache of the last receive function called.
+ * We cache it so we don't need to keep looking up the receive function
+ * info on each packet.
+ */
+ DMA_RecFifoRecvFunction_t recv_func_ptr=NULL; /* Pointer to receive function*/
+ void *recv_func_parm=NULL;/* Receive function parameter */
+ int recv_func_id=-1; /* Function ID from the packet */
+ /* header. Init to -1 means */
+ /* recv_func_ptr and */
+ /* recv_func_parm do not cache */
+ /* the previous packet values. */
+
+ DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */
+ unsigned int packet_bytes; /* Number of bytes in the packet. */
+ unsigned int wrap; /* 1: A wrap of the fifo is going */
+ /* to occur. */
+ /* 0: No wrap is going to occur. */
+
+ /*
+ * Processing of packets occurs in the fifo in three phases:
+ * Normal Phase 1 : Packets before the wrap.
+ * Handle Wrap Phase: Packets during the wrap.
+ * Normal Phase 2 : Packets after the wrap.
+ */
+ void *va_logical_tail; /* The point beyond which normal */
+ /* processing of packets ends. */
+ void *va_starting_head; /* Pointer to the first packet in */
+ /* a contiguous group extracted */
+ /* from the fifo. */
+ void *va_nextHead; /* Pointer to the next packet to */
+ /* be processed. */
+ void *va_tail; /* Snapshot of the fifo's tail. */
+ unsigned int num_packets_processed_since_moving_fifo_head; /*
+ Tells us when we should move the
+ hardware head. */
+
+ SPI_assert( fg_ptr != NULL );
+ SPI_assert( ( fifo_id >= 0 ) &&
+ ( fifo_id < DMA_NUM_NORMAL_REC_FIFOS_PER_GROUP ) );
+ /*
+ * Start the fruitful poll count at the max.
+ * For unlimited, set to a very high value.
+ */
+ fruitfulPollCount = (fruitfulPollLimit == 0) ? 0x7FFFFFFF : fruitfulPollLimit;
+
+ /*
+ * Establish pointers to the reception fifo and the DMA fifo.
+ * Snapshot the hardware head and tail pointers...they may change while we
+ * are running. We will snapshot the tail again after processing everything
+ * up to this snapshot, until the fifo is empty (head == tail).
+ */
+ DMA_RecFifo_t *rec_fifo_ptr = &(fg_ptr->fifos[fifo_id]);
+ DMA_Fifo_t *fifo_ptr = &(rec_fifo_ptr->dma_fifo);
+ void *va_head = fifo_ptr->va_head;
+ va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */
+ /* tail. */
+ num_packets_processed_since_moving_fifo_head =
+ rec_fifo_ptr->num_packets_processed_since_moving_fifo_head; /* Fetch */
+ /* for later use. */
+
+#if defined(CONFIG_BGP_STATISTICS)
+ {
+ unsigned int used_space = (fifo_ptr->va_tail >= fifo_ptr->va_head)
+ ? ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 )
+ : (fifo_ptr->fifo_size + ( ((unsigned)(fifo_ptr->va_tail) - (unsigned)(fifo_ptr->va_head)) >> 4 ) )
+ ;
+ reception_fifo_histogram[fls(used_space)] += 1 ;
+ }
+#endif
+ /*
+ * Loop processing packets until the fifo is empty or the fruitfulPollLimit
+ * has been reached.
+ * At the top of the loop, we have a new snapshot of the tail, so something
+ * may have appeared in the fifo.
+ */
+ while ( ( rc == 0 ) &&
+ ( va_tail != va_head ) &&
+ ( fruitfulPollCount > 0 ) ) { /* Is there something in this fifo? */
+ /* Yes... */
+ fruitfulPollCount--; /* Count the polls */
+
+ /*
+ * MSYNC before we look at the data in the fifo to ensure that snoops
+ * issued by the DMA have completed. This ensures the L1 cache
+ * invalidations have completed so we don't look at stale data.
+ */
+ _bgp_msync();
+
+ /*
+ * Touch the first packet right away so it is is loaded into the memory
+ * cache before we try to use it.
+ */
+ _bgp_dcache_touch_line( va_head );
+
+ /*
+ * Prepare to split up the processing between "normal" and "handleWrap".
+ * Establish a "logicalTail" which is the point beyond which "normal"
+ * processing changes to "handleWrap" processing.
+ */
+ if ( va_head < va_tail ) { /* No wrap will occur? */
+ wrap = 0;
+ va_logical_tail = va_tail; /* Logical tail is the physical tail */
+ }
+ else { /* Wrap will occur. Logical tail is 256 bytes before the end
+ * of the fifo. We need to stop normal phase 1 there because
+ * that is the first point at which the next packet could wrap.
+ */
+ wrap = 1;
+ va_logical_tail = (void*)( ((unsigned)fifo_ptr->va_end) - 256 );
+ }
+
+ /* Loop processing packets until we hit our tail snapshot */
+ while ( ( rc == 0 ) &&
+ ( va_head != va_tail ) ) {
+ /*
+ * Process packets that do not wrap. This is everything up to the
+ * logical tail. This gets executed both before and after wrapping.
+ * This is normal phase 1 and normal phase 2.
+ */
+ va_starting_head = va_head;
+
+ while ( ( rc == 0 ) &&
+ ( va_head < va_logical_tail ) ) {
+
+ packet_ptr = (DMA_PacketHeader_t*)va_head;
+ packet_bytes = (packet_ptr->Chunks + 1) << 5;
+
+ /*
+ * Touch the NEXT packet to ensure it will be in L1 cache when we
+ * are ready for it on the next iteration. Even though the packet will
+ * likely be touched in its entirety by the receive function, and that
+ * will likely cause the processor to perform prefetching of the next
+ * packet, bringing in the next packet now has been shown to improve
+ * bandwidth from 1.41 bytes/cycle to 1.44 bytes/cycle, so we put
+ * this dcbt here.
+ */
+ va_nextHead = (void*) ( (unsigned)va_head + packet_bytes );
+
+ if ( va_nextHead < va_logical_tail )
+ _bgp_dcache_touch_line( va_nextHead );
+
+ /*
+ * Determine the receive function to call.
+ * The packet header Func_Id contains the ID of the function to call.
+ * We cache the previous packet's values because it is likely this
+ * packet will be the same. If not, call out of line function to
+ * re-prime the cache.
+ */
+ if ( packet_ptr->Func_Id != recv_func_id ) {
+ recv_func_id = packet_ptr->Func_Id;
+ DMA_RecFifoPollPrimeRecvFuncCache( recv_func_id,
+ &recv_func_ptr,
+ &recv_func_parm );
+ }
+
+ /* Call the receive function, and no matter what happens, increment
+ * the number of packets processed and move our head snapshot to the
+ * next packet.
+ */
+ SPI_assert ( recv_func_ptr != NULL );
+
+ if( recv_func_ptr)
+ {
+ rc = (*recv_func_ptr)( rec_fifo_ptr,
+ packet_ptr,
+ recv_func_parm,
+ (char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),
+ packet_bytes - sizeof(DMA_PacketHeader_t) );
+ }
+ else
+ {
+ printk(KERN_ERR "DMA_RecFifoSimplePollNormalFifoById recv_func_ptr was NULL recv_func_id=%02x rec_fifo_ptr=%p packet_ptr=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+ recv_func_id,rec_fifo_ptr,packet_ptr,recv_func_parm,(char*)((unsigned)packet_ptr + sizeof(DMA_PacketHeader_t)),packet_bytes - sizeof(DMA_PacketHeader_t)) ;
+
+ }
+ num_processed++;
+ num_packets_processed_since_moving_fifo_head++;
+
+#ifdef DEBUG_PRINT
+ printf("SimplePollById: num_processed=%d, va_head=0x%08x, va_tail=0x%08x, va_logical_tail=0x%08x, va_end=0x%08x, willWrap=%d\n",num_processed,(unsigned)va_head,(unsigned)va_tail,(unsigned)va_logical_tail,(unsigned)fifo_ptr->va_end,wrap);
+#endif
+
+ va_head = va_nextHead;
+
+ } /* End: Process packets that do not wrap */
+
+ /*
+ * We are done processing all packets prior to the wrap.
+ * If the shadow va_head is not in sync with the hardware head, or if
+ * we are going to wrap, sync up the hardware head and recalculate the
+ * free space. The movement of the head causes the fifo's free space
+ * to be recalculated.
+ *
+ * The wrap function requires that the shadow and hardware heads be in
+ * sync. If we are not wrapping, we condition the syncing of the heads
+ * on whether we have exceeded our limit on the number of packets we
+ * processed in a fifo since the last time we moved the
+ * hardware head. If we have only processed a few packets, we just
+ * leave the hardware head where it is and don't incur the expense of
+ * moving the hardware head. If we have processed at least our limit
+ * of packets, then it is good to move the hardware head.
+ */
+ if ( ( num_packets_processed_since_moving_fifo_head >
+ DMA_MAX_NUM_PACKETS_BEFORE_MOVING_HEAD ) ||
+ ( wrap ) ) {
+
+ DMA_FifoSetHead( fifo_ptr, va_head );
+
+ num_packets_processed_since_moving_fifo_head = 0;
+ }
+
+ /*
+ * If we are anticipating a wrap, go handle the wrap.
+ */
+ if ( ( rc == 0 ) && wrap ) {
+ /*
+ * Handle the wrapping of the fifo. This requires extra checking
+ * and moving of the head, and thus is in its own function.
+ * It is a generic function, used by other poll functions. Some of
+ * these other poll functions have the ability to quit processing
+ * packets when a specified limit is reached overall, or per fifo.
+ * That is what the last two parameters specify. For this poll
+ * function, we don't have any limit...we process packets until the
+ * fifo is empty, so we pass in large unreachable limits.
+ */
+ rc = DMA_RecFifoPollProcessWrap (
+ rec_fifo_ptr,
+ &va_head,
+ va_tail,
+ &num_processed,
+ &num_processed_in_fifo,
+ 0x7FFFFFFF, /* Infinite packet limit, overall */
+ 0x7FFFFFFF);/* Infinite packet limit per fifo */
+
+ va_logical_tail = va_tail; /* Set to actual tail now. */
+ wrap = 0; /* Next time around, don't do wrap processing. */
+ }
+
+ } /* End: Process packets until we hit our snapshotted tail */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+ NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */
+ /* likely not encountered a fifo full */
+ /* condition and stopped. Reset the */
+ /* fifo counter so we will start */
+ /* tracking empty calls to poll. */
+#endif
+
+ va_tail = DMA_FifoGetTailNoFreeSpaceUpdate( fifo_ptr ); /* Snapshot HW */
+ /* tail again. */
+
+ } /* End: Loop while there is something in the fifo */
+
+ /*
+ * The fifo is now empty. If we have processed at least one packet,
+ * return the number, or if the receive function returned an error,
+ * return that return code.
+ * Also, clear the reception fifo threshold crossed interrupt condition.
+ */
+ if ( num_processed > 0 ) {
+ /* Store in the fifo structure the number of packets processed since
+ * last moving the hardware head, and the current head */
+ rec_fifo_ptr->num_packets_processed_since_moving_fifo_head =
+ num_packets_processed_since_moving_fifo_head;
+ fifo_ptr->va_head = va_head;
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ _BN(rec_fifo_ptr->global_fifo_id),
+ 0 );
+
+ if ( rc == 0 ) return (num_processed);
+ else return (rc);
+ }
+
+ /*
+ * We didn't process any packets. This could be because the rDMA has
+ * shut-down (a DD1 hardware behavior) because the reception fifo became full.
+ * We count the number of times we consecutively come up empty, and reactivate
+ * the rDMA via a system call.
+ */
+ else {
+
+#if defined(BGP_DD1_WORKAROUNDS)
+ if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+ (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+ /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+ rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+ /* reception fifos filled and the */
+ /* DMA has stopped. */
+ /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+ NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */
+ /* fill-fifo counter. */
+ }
+#endif
+
+ return (0); /* Return no packets processed */
+ }
+
+} /* End: DMA_RecFifoSimplePollNormalFifoById() */
+
+
+/*!
+ * \brief Poll Header Reception Fifo Given a Fifo Group
+ *
+ * Poll the "header" reception fifo in the specified fifo group,
+ * removing one packet after another from the fifo, dispatching the appropriate
+ * receive function for each packet, until one of the following occurs:
+ * 1. Total_packets packets are received
+ * 2. The specified fifo is empty
+ * 3. A receive function returns a non-zero value
+ *
+ * If the header fifo has a packet, the appropriate receive function is
+ * called. Upon return, the packet is removed from the fifo (the fifo head is
+ * moved past the packet).
+ *
+ * After processing num_packets packets in the fifo (or emptying that fifo),
+ * the function returns the number of packets processed.
+ *
+ * The receive function must be registered through the
+ * DMA_RecFifoRegisterRecvFunction interface. The receive function is
+ * called with a pointer to the packet header. The packet header is always
+ * 16 bytes of contiguous storage, in the fifo. When the
+ * receive function returns, user code cannot assume that the buffer is
+ * permanent, i.e., after return, it may be overwritten by either the DMA or
+ * the poll function. To keep a copy of the packet, the receive function would
+ * have to copy it to some other location. The packet header is 16-byte aligned
+ * for optimized copying.
+ *
+ * \param[in] num_packets The maximum number of packets that will be
+ * processed.
+ * \param[in] num_empty_passes When the not-empty status indicates that all
+ * fifos in the group are emtpy, this is the
+ * number of times the not-empty status is
+ * re-fetched and re-checked before officially
+ * declaring that they are indeed empty.
+ * (0 means no extra passes are made).
+ * \param[in] not_empty_poll_delay The number of pclks to delay between polls
+ * of the not-empty status when the fifos are
+ * empty.
+ * \param[in] fg_ptr Pointer to the fifo group.
+ *
+ * \retval num_packets_received The number of packets received and processed.
+ * \retval negative_value The return code from the receive function that
+ * caused polling to end.
+ *
+ * \pre The caller is responsible for disabling interrupts before invoking this
+ * function.
+ *
+ */
+int DMA_RecFifoPollHeaderFifo( int num_packets,
+ int num_empty_passes,
+ int not_empty_poll_delay,
+ DMA_RecFifoGroup_t *fg_ptr
+ )
+{
+ int fifo_index; /* Index of fifo being processed */
+ int num_packets_in_fifo; /* Count of packets processed in a */
+ /* fifo. */
+ unsigned int status; /* Snapshot of the not empty status*/
+ /* for this group. */
+ int rc = 0; /* Return code from recv_func. */
+ int num_processed = 0; /* Number of packets processed */
+ DMA_PacketIovec_t io_vec; /* Payload I/O vector */
+ DMA_RecFifoRecvFunction_t recv_func_ptr; /* Pointer to receive function */
+ void *recv_func_parm;/* Receive function parameter */
+ DMA_RecFifo_t *fifo_ptr; /* Pointer to fifo being processed */
+ DMA_PacketHeader_t *packet_ptr; /* Pointer to packet header */
+ int passes; /* Counter of not-empty passes */
+
+ SPI_assert( num_packets > 0 );
+ SPI_assert( num_empty_passes >= 0 );
+ SPI_assert( fg_ptr != NULL );
+
+
+ fifo_index = DMA_HEADER_REC_FIFO_ID; /* We are working with the header */
+ /* fifo. */
+ fifo_ptr = &(fg_ptr->fifos[fifo_index]);
+
+ /*
+ * Loop until the header fifo is declared empty, or
+ * until one of the termination conditions documented in the prolog occurs.
+ *
+ */
+ for (;;)
+ {
+ /*
+ * If the DMA SRAM not-empty status for this fifo is zero (the fifo is
+ * empty), the status is checked num_empty_passes times with a slight
+ * delay in between to give the DMA time to make progress before declaring
+ * that the fifo is truely empty.
+ */
+ passes = num_empty_passes;
+ status = DMA_RecFifoGetNotEmptyById( fg_ptr,
+ fifo_index ); /* Get Header fifo */
+ /* not-empty status. */
+ while ( ( status == 0 ) &&
+ ( num_empty_passes-- > 0 ) )
+ {
+ /* Delay, allowing the DMA to update its status */
+ unsigned int pclks = not_empty_poll_delay;
+ while( pclks-- )
+ {
+ asm volatile("nop;");
+ }
+
+ /* Re-fetch the not-empty status */
+ status = DMA_RecFifoGetNotEmptyById(
+ fg_ptr,
+ fifo_index ); /* Get Header fifo */
+ /* not-empty status.*/
+ }
+
+ if ( status == 0 ) { /* Fifo is empty? */
+
+#if defined(BGP_DD1_WORKAROUNDS)
+ if (num_processed > 0) { /* Did we process at least 1 packet? */
+ NumEmptyPollFunctionCalls = 0; /* The DMA must be active. It has */
+ /* likely not encountered a fifo full */
+ /* condition and stopped. Reset the */
+ /* fifo counter so we will start */
+ /* tracking empty calls to poll. */
+ }
+ else {
+ if ( (NumEmptyPollFunctionCalls >= 0) && /* We are tracking empty calls? */
+ (++NumEmptyPollFunctionCalls >= NUM_EMPTY_POLL_FUNCTION_CALL_LIMIT) ) {
+ /* printf("Hit Empty Poll Limit...invoking syscall to clear full condition\n"); */
+ rc = Kernel_ClearFullReceptionFifo(); /* Activate rDMA in case the */
+ /* reception fifos filled and the */
+ /* DMA has stopped. */
+ /* printf("Returned from ClearFull syscall with rc=%d\n",rc); */
+ NumEmptyPollFunctionCalls = -1; /* The DMA is active. Reset the */
+ /* fill-fifo counter. */
+ }
+ }
+#endif
+
+ return (num_processed);
+ }
+
+ /* The fifo has something in it.
+ * Update its shadow va_tail pointer to reflect the amount of packet
+ * data in the fifo.
+ */
+ DMA_RecFifoGetTailById( fg_ptr,
+ fifo_index );
+
+ num_packets_in_fifo = 0;
+
+ /*
+ * MSYNC before we look at the data in the fifo to ensure that snoops
+ * issued by the DMA have completed. This ensures the L1 cache
+ * invalidations have completed so we don't look at stale data.
+ */
+ _bgp_msync();
+
+ /*
+ * Within a fifo: The area between the va_head and va_tail shadow pointers
+ * contains packets to be processed. Loop, processing those packets until
+ * we have processed packets_per_fifo of them, or all of them, or other
+ * issues come up.
+ *
+ */
+ while ( ( num_packets_in_fifo < num_packets ) &&
+ ( fifo_ptr->dma_fifo.va_head != fifo_ptr->dma_fifo.va_tail ) )
+ {
+ DMA_RecFifoGetAddresses( fifo_ptr,
+ &io_vec ); /* Get the payload pointer(s) */
+ /* for the packet at the head */
+ /* of the fifo. */
+
+ packet_ptr = (DMA_PacketHeader_t*)
+ fifo_ptr->dma_fifo.va_head; /* Point to packet header*/
+
+ /* Determine the receive function to call */
+ recv_func_ptr = DMA_RecFifoInfo.headerRecvFunction;
+ if ( recv_func_ptr != NULL )
+ {
+ recv_func_parm = DMA_RecFifoInfo.headerRecvFunctionParm;
+ }
+ else
+ {
+ recv_func_ptr = DMA_RecFifoInfo.errorRecvFunction;
+ recv_func_parm = DMA_RecFifoInfo.errorRecvFunctionParm;
+ }
+
+ /* Call the receive function */
+ if( recv_func_ptr)
+ {
+ rc = (*recv_func_ptr)(fifo_ptr,
+ packet_ptr,
+ recv_func_parm,
+ NULL, /* No payload */
+ 0); /* No payload bytes */
+ }
+ else
+ {
+ printk(KERN_ERR "DMA_RecFifoPollHeaderFifo recv_func_ptr was NULL rfifo_ptr=%p packet_ptr=%p recv_func_parm=%p recv_func_payload=%p length=%d\n",
+ fifo_ptr,packet_ptr,recv_func_parm,NULL,0) ;
+
+ }
+
+ DMA_RecFifoIncrementHead(fifo_ptr,
+ 1);/* Increment head by 16 bytes */
+
+ num_processed++;
+
+ if ( rc != 0 ) /* Did receive function fail? */
+ {
+ /* Clear the threshold crossed condition, in case we have gone below
+ * the threshold.
+ */
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ _BN(fifo_ptr->global_fifo_id),
+ 0 );
+ return (rc); /* Yes...return that return code */
+ }
+
+ if ( num_processed >= num_packets ) /* Got what they wanted? */
+ {
+ /* Clear the threshold crossed condition, in case we have gone below
+ * the threshold.
+ */
+ DMA_RecFifoSetClearThresholdCrossed( fg_ptr,
+ _BN(fifo_ptr->global_fifo_id),
+ 0 );
+ return (num_processed); /* Yes...all done */
+ }
+
+ num_packets_in_fifo++;
+
+ } /* End: Process up to packets_per_fifo packets in this fifo */
+
+ } /* End: Keep looping through the fifo. */
+
+} /* End: DMA_RecFifoPollHeaderFifo() */
+
+EXPORT_SYMBOL(DMA_RecFifoRegisterRecvFunction) ;
+EXPORT_SYMBOL(DMA_RecFifoGetFifoGroup) ;
+EXPORT_SYMBOL(DMA_RecFifoPollNormalFifoById) ;
+#if defined(CONFIG_BGP_STATISTICS)
+EXPORT_SYMBOL(reception_fifo_histogram) ;
+EXPORT_SYMBOL(reception_hi_watermark) ;
+#endif
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index 9caf5b5ad1c05b..6585ac4b75ee3c 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -111,6 +111,8 @@ obj-$(CONFIG_PS3_FLASH) += ps3flash.o
obj-$(CONFIG_JS_RTC) += js-rtc.o
js-rtc-y = rtc.o
+obj-$(CONFIG_BGP) += bluegene_console.o bluegene_networks.o
+
# Files generated that shall be removed upon make clean
clean-files := consolemap_deftbl.c defkeymap.c
diff --git a/drivers/char/bluegene_console.c b/drivers/char/bluegene_console.c
new file mode 100644
index 00000000000000..8bbfe64bcca0bc
--- /dev/null
+++ b/drivers/char/bluegene_console.c
@@ -0,0 +1,805 @@
+/*
+ * Blue Gene Console over JTAG.
+ *
+ * (C) Copyright IBM Corp. 2003,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Todd Inglett <tinglett@vnet.ibm.com>
+ *
+ *
+ */
+
+#include <linux/unistd.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/major.h>
+#include <linux/kernel.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/kbd_kern.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/sysrq.h>
+#include <linux/syscalls.h>
+#include <linux/proc_fs.h>
+
+#include <asm/bluegene.h>
+#include <asm/bluegene_ras.h>
+
+static struct proc_dir_entry *proc_ras; /* /proc/ras */
+static struct proc_dir_entry *proc_ras_ascii; /* /proc/ras_ascii */
+
+
+/* ToDo: figure out what to do with bgprintf... */
+#define bgprintf udbg_printf
+#include <asm/udbg.h>
+
+
+#define BLUEGENE_MAJOR 229
+#define BLUEGENE_MINOR 0
+
+
+typedef struct _BG_CONSOLE {
+ struct tty_struct* tty;
+ spinlock_t ttyLock;
+ struct tty_driver* ttyDriver;
+#define BG_OUTBOX_BUFF_SIZE 8192
+ unsigned char outboxBuff[BG_OUTBOX_BUFF_SIZE];
+ spinlock_t outboxLock;
+#define BG_RAS_MAGIC_CHAR ((unsigned char) 0xff)
+#define BG_RAS_TYPE_BINARY ((unsigned char) 0x82)
+#define BG_RAS_TYPE_ASCII ((unsigned char) 0x88)
+#define BG_OUTBOX_MSG_SIZE 248
+ unsigned int outboxHead;
+ unsigned int outboxTail;
+ unsigned int outboxMsgAge;
+#define BG_OUTBOX_MAX_AGE 36
+ unsigned int outboxRetry;
+#define BG_OUTBOX_MAX_RETRY 2
+ int outboxMsgSent;
+ struct task_struct* kmboxdTask;
+ /* Wait queue to wakeup kmboxd. For now it runs strictly on timeout (polling),
+ * but in the future an interrupt or other means could wake it.
+ */
+ wait_queue_head_t wait;
+} BG_CONSOLE;
+
+
+static BG_CONSOLE bgc = {
+ .tty = NULL,
+ .ttyLock = SPIN_LOCK_UNLOCKED,
+ .ttyDriver = NULL,
+ .outboxLock = SPIN_LOCK_UNLOCKED,
+ .outboxHead = 0,
+ .outboxTail = 0,
+ .outboxMsgAge = 0,
+ .outboxRetry = 0,
+ .outboxMsgSent = 0,
+ .kmboxdTask = NULL,
+};
+
+
+#define BG_OUTBOX_HEAD_INCREMENT(i) bgc.outboxHead = (bgc.outboxHead + (i)) % BG_OUTBOX_BUFF_SIZE
+#define BG_OUTBOX_TAIL_INCREMENT(i) bgc.outboxTail = (bgc.outboxTail + (i)) % BG_OUTBOX_BUFF_SIZE
+
+
+/* How many bytes of outbox buffer space are in use. The caller must be */
+/* holding the outbox lock. */
+static inline int __bgOutboxBufferUsed(void)
+{
+ int rc = 0;
+
+ if (bgc.outboxHead <= bgc.outboxTail)
+ rc = bgc.outboxTail - bgc.outboxHead;
+ else
+ rc = BG_OUTBOX_BUFF_SIZE - bgc.outboxHead + bgc.outboxTail;
+
+ return rc;
+}
+
+
+/* How many bytes of buffer space are in use. */
+static inline int bgOutboxBufferUsed(struct tty_struct* tty)
+{
+ unsigned long flags;
+ int rc;
+
+ spin_lock_irqsave(&bgc.outboxLock, flags);
+ rc = __bgOutboxBufferUsed();
+ spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+ return rc;
+}
+
+
+/* How many bytes of outbox buffer space are unused. The caller must be */
+/* holding the outbox lock. */
+static inline int __bgOutboxBufferFree(void)
+{
+ int rc;
+
+ if (bgc.outboxHead > bgc.outboxTail)
+ rc = bgc.outboxHead - bgc.outboxTail;
+ else
+ rc = BG_OUTBOX_BUFF_SIZE - bgc.outboxTail + bgc.outboxHead;
+
+ return rc;
+}
+
+
+/* How many bytes of buffer space are free. */
+static inline int bgOutboxBufferFree(struct tty_struct* tty)
+{
+ int rc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bgc.outboxLock, flags);
+ rc = __bgOutboxBufferFree();
+ spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+ return rc;
+}
+
+
+/* Append the specified data to the outbox buffer. */
+static inline int __bgOutboxBufferAppend(unsigned char* data,
+ unsigned int dataLen)
+{
+ int i = 0;
+
+ while ((!dataLen && data[i]) || i < dataLen) {
+ bgc.outboxBuff[bgc.outboxTail] = data[i++];
+ if ((bgc.outboxTail + 1) % BG_OUTBOX_BUFF_SIZE != bgc.outboxHead)
+ bgc.outboxTail = (bgc.outboxTail + 1) % BG_OUTBOX_BUFF_SIZE;
+ else
+ break;
+ }
+
+ return i;
+}
+
+
+/* Remove the specified number of bytes from the outbox buffer. */
+static inline int __bgOutboxBufferRemove(unsigned char* data,
+ unsigned int dataLen)
+{
+ int i = 0;
+
+ while (bgc.outboxHead != bgc.outboxTail && i < dataLen) {
+ data[i++] = bgc.outboxBuff[bgc.outboxHead];
+ bgc.outboxHead = (bgc.outboxHead + 1) % BG_OUTBOX_BUFF_SIZE;
+ }
+
+ return i;
+}
+
+
+/* Search for the end of the line, starting at the specified index for the specified maximum length. */
+/* The end of a line is defined by the presence of a newline character or the RAS magic character or */
+/* the end of the buffer. The number of bytes in the line are returned and 'index' is set to the */
+/* buffer index of the last character in the line. If no line can be found zero is returned and */
+/* 'index' is set to the buffer index of the last character examined. The caller must ensure that */
+/* the outbox is locked. */
+inline static int __bgOutboxBuffFindEOL(unsigned int* index, unsigned int maxLen) {
+ int rc;
+ int i = *index;
+ int limit;
+ int foundRAS = 0;
+
+ /* Determine the limit of the search. */
+ limit = (*index + maxLen - 1 < BG_OUTBOX_BUFF_SIZE - 1 ? *index + maxLen - 1 : BG_OUTBOX_BUFF_SIZE - 1);
+ if (bgc.outboxTail > *index && limit > bgc.outboxTail -1)
+ limit = bgc.outboxTail - 1;
+
+ /* Search for a newline. */
+ while (i < limit && bgc.outboxBuff[i] != '\n') {
+ if (bgc.outboxBuff[i] == BG_RAS_MAGIC_CHAR) {
+ unsigned char nextChar = bgc.outboxBuff[(i+1) % BG_OUTBOX_BUFF_SIZE];
+
+ if ((nextChar == BG_RAS_TYPE_BINARY || nextChar == BG_RAS_TYPE_ASCII) &&
+ (i+1) % BG_OUTBOX_BUFF_SIZE != bgc.outboxTail) {
+ foundRAS = 1;
+ break;
+ }
+ }
+ i++;
+ }
+ if (bgc.outboxBuff[i] == '\n') {
+ /* Found the end of a line. */
+ rc = i - *index + 1;
+ *index = i;
+ } else if (foundRAS) {
+ /* Ran into a RAS message so end the line. */
+ rc = i - *index;;
+ *index = i - 1;
+ } else {
+ /* Reached the search limit. */
+ rc = 0;
+ *index = i;
+ }
+
+ return rc;
+}
+
+
+/* Send any buffered messages so long as the outbox is ready. This function assumes that the caller is */
+/* holding the outbox buffer lock. */
+int __bgFlushOutboxMsgs(void)
+{
+ int rc = 0;
+
+ /* Send buffered outbox messages as long as there is something to send and the mailbox is ready. */
+ while (bgc.outboxHead != bgc.outboxTail && !bluegene_testForOutboxCompletion()) {
+ unsigned char nextChar = bgc.outboxBuff[(bgc.outboxHead + 1) % BG_OUTBOX_BUFF_SIZE];
+
+ /* We have a message to send. Is it RAS or a console message? */
+ if (bgc.outboxBuff[bgc.outboxHead] == BG_RAS_MAGIC_CHAR &&
+ (nextChar == BG_RAS_TYPE_BINARY || nextChar == BG_RAS_TYPE_ASCII) &&
+ (__bgOutboxBufferUsed() >= sizeof(bg_ras) + 2)) {
+ /* Send a RAS message to the outbox. */
+ bg_ras ras;
+ int rc;
+
+ /* Copy the RAS information out of the buffer into a form we can easily deal with. */
+ BG_OUTBOX_HEAD_INCREMENT(2);
+ rc = __bgOutboxBufferRemove((unsigned char*) &ras, sizeof(ras));
+
+ /* Send the RAS. */
+ do {
+ if (nextChar == BG_RAS_TYPE_BINARY) {
+ /* Send binary RAS to the outox. */
+ bgc.outboxMsgSent = !bluegene_writeRASEvent_nonBlocking(ras.comp, ras.subcomp, ras.code,
+ ras.length / sizeof(int), (int*) ras.data);
+ } else if (nextChar == BG_RAS_TYPE_ASCII) {
+ /* Send ASCII RAS. */
+ int sent = bluegene_writeRASString_nonBlocking(ras.comp, ras.subcomp, ras.code, ras.data);
+
+ bgc.outboxMsgSent = (sent == 0 || sent == -2);
+ } else {
+ bgprintf("Unknown RAS msg type %d\n", nextChar);
+ break;
+ }
+ } while (!bgc.outboxMsgSent && bgc.outboxRetry++ < BG_OUTBOX_MAX_RETRY);
+ if (!bgc.outboxMsgSent) {
+ bgprintf("Unable to send RAS (0x%02x 0x%02x 0x%02x\n", ras.comp, ras.subcomp, ras.code);
+ rc = -EIO;
+ }
+ bgc.outboxRetry = 0;
+ } else {
+ /* Send console messages. */
+ unsigned int EOL = bgc.outboxHead;
+ unsigned int msgLen = 0;
+ unsigned int len;
+
+ /* Group lines into an outbox-sized block of lines. */
+ while (EOL != bgc.outboxTail && msgLen < BG_OUTBOX_MSG_SIZE &&
+ (len = __bgOutboxBuffFindEOL(&EOL, BG_OUTBOX_MSG_SIZE - msgLen)) > 0) {
+ /* Found another line. Append it to the outbox message. */
+ EOL = (EOL+1) % BG_OUTBOX_BUFF_SIZE;
+ msgLen += len;
+ }
+
+ /* Determine if there are complete lines to print or if we should print a partial line. */
+ if (!msgLen) {
+ unsigned int bytesAvailable = EOL - bgc.outboxHead + 1;
+
+ if (bytesAvailable == BG_OUTBOX_MSG_SIZE || bgc.outboxMsgAge++ >= BG_OUTBOX_MAX_AGE) {
+ /* Either we have a full outbox message or output is too old. Send it now. */
+ msgLen = bytesAvailable;
+ } else {
+ rc = -EAGAIN; // wait for more output
+ break;
+ }
+ }
+
+ /* Send any outbox message data. */
+ if (msgLen) {
+ bgc.outboxMsgSent = !bluegene_writeToMailboxConsole_nonBlocking(bgc.outboxBuff+bgc.outboxHead, msgLen);
+ if (bgc.outboxMsgSent || bgc.outboxRetry++ > BG_OUTBOX_MAX_RETRY) {
+ BG_OUTBOX_HEAD_INCREMENT(msgLen);
+ bgc.outboxMsgAge = bgc.outboxRetry = 0;
+ rc = (bgc.outboxMsgSent ? rc + 1 : -EIO);
+ } else {
+ rc = -EAGAIN;
+ }
+ }
+ }
+ }
+
+ /* If a message was sent (now or during a past call) then check to see if the message has been */
+ /* taken so that we lower outbox attention ASAP. */
+ if (bgc.outboxMsgSent && !bluegene_testForOutboxCompletion())
+ bgc.outboxMsgSent = 0;
+
+ /* If there is something to send but the outbox wasn't ready then return -EWOULDBLOCK. */
+ if (!rc && bgc.outboxHead != bgc.outboxTail)
+ rc = -EWOULDBLOCK;
+
+ return rc;
+}
+
+
+/* Send any buffered messages so long as the outbox is ready. This function locks the outbox before accessing it. */
+inline int bgFlushOutboxMsgs(void)
+{
+ int rc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bgc.outboxLock, flags);
+ rc = __bgFlushOutboxMsgs();
+ spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+ return rc;
+}
+
+
+/* Add a console message to the outbox buffer. */
+int bgWriteConsoleMsg(struct tty_struct* tty,
+ const unsigned char* msg,
+ int msgLen)
+{
+ int rc = 0;
+
+ if (msgLen > 0) {
+ unsigned long flags;
+
+ /* Lock the outbox. */
+ spin_lock_irqsave(&bgc.outboxLock, flags);
+
+ /* Copy the message to the buffer, wrapping around if necessary. */
+ rc = __bgOutboxBufferAppend((char*) msg, (unsigned int) msgLen);
+
+ /* Unlock outbox. */
+ spin_unlock_irqrestore(&bgc.outboxLock, flags);
+ }
+
+ return rc;
+}
+
+
+/* Add a binary RAS event to the outbox buffer. If the buffer is full this function flushes */
+/* outbox messages to free buffer space. */
+int bgWriteRasEvent(unsigned int component,
+ unsigned int subcomponent,
+ unsigned int errCode,
+ unsigned int data[],
+ unsigned int dataLen)
+{
+ int rc = 1;
+ unsigned long flags;
+ bg_ras ras;
+
+ /* Lock the outbox buffer. */
+ spin_lock_irqsave(&bgc.outboxLock, flags);
+
+ /* If insufficient buffer space exists then flush outbox messages until we free enough space. */
+ while (__bgOutboxBufferFree() < sizeof(ras) + 2)
+ __bgFlushOutboxMsgs();
+
+ /* Initialize the RAS structure. */
+ ras.comp = component;
+ ras.subcomp = subcomponent;
+ ras.code = errCode;
+ ras.length = (dataLen <= sizeof(ras.data) ? dataLen : sizeof(ras.data));
+ memcpy(ras.data, (char*) data, ras.length);
+
+ /* Copy the RAS information to the outbox buffer. */
+ bgc.outboxBuff[bgc.outboxTail] = BG_RAS_MAGIC_CHAR;
+ BG_OUTBOX_TAIL_INCREMENT(1);
+ bgc.outboxBuff[bgc.outboxTail] = BG_RAS_TYPE_BINARY;
+ BG_OUTBOX_TAIL_INCREMENT(1);
+ rc = __bgOutboxBufferAppend((unsigned char*) &ras, sizeof(ras));
+
+ /* Unlock the outbox buffer. */
+ spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+ return rc;
+}
+
+
+/* Add an ASCII RAS event to the outbox buffer. If the buffer is full this function flushes */
+/* outbox messages to free buffer space. */
+int bgWriteRasStr(unsigned int component,
+ unsigned int subcomponent,
+ unsigned int errCode,
+ char* str,
+ unsigned int strLen)
+{
+ int rc = 1;
+ unsigned long flags;
+ bg_ras ras;
+
+ /* Lock the outbox buffer. */
+ spin_lock_irqsave(&bgc.outboxLock, flags);
+
+ /* If insufficient buffer space exists then flush outbox messages until we free enough space. */
+ while (__bgOutboxBufferFree() < sizeof(ras) + 2)
+ __bgFlushOutboxMsgs();
+
+ /* Initialize the RAS structure. */
+ ras.comp = component;
+ ras.subcomp = subcomponent;
+ ras.code = errCode;
+ if (!strLen || strLen > sizeof(ras.data))
+ strLen = sizeof(ras.data)-1;
+ for (ras.length = 0; *str && ras.length < strLen; str++, ras.length++)
+ ras.data[ras.length] = *str;
+ ras.data[ras.length] = '\0';
+
+ /* Copy the RAS information to the outbox buffer. */
+ bgc.outboxBuff[bgc.outboxTail] = BG_RAS_MAGIC_CHAR;
+ BG_OUTBOX_TAIL_INCREMENT(1);
+ bgc.outboxBuff[bgc.outboxTail] = BG_RAS_TYPE_ASCII;
+ BG_OUTBOX_TAIL_INCREMENT(1);
+ rc = __bgOutboxBufferAppend((unsigned char*) &ras, sizeof(ras));
+
+ /* Unlock the outbox buffer. */
+ spin_unlock_irqrestore(&bgc.outboxLock, flags);
+
+ return rc;
+}
+
+
+static int bluegenecons_open(struct tty_struct *tty, struct file * filp)
+{
+ if (tty->count == 1) {
+ bgc.tty = tty;
+ tty->driver_data = &bgc;
+ }
+
+ return 0;
+}
+
+static void bluegenecons_close(struct tty_struct *tty, struct file * filp)
+{
+ if (tty && tty->count == 1) {
+ bgc.tty = NULL;
+ }
+
+ return;
+}
+
+
+#define BLUEGENECONS_MAGIC_SYSRQ_KEY (15) /* ^O */
+
+static void bluegenecons_rcv(char *msg, int msglen)
+{
+ struct tty_struct *tty;
+ unsigned long flags;
+ static int sysrq_mode;
+
+ spin_lock_irqsave(&bgc.ttyLock, flags);
+ tty = bgc.tty;
+ if (tty) {
+ while (msglen) {
+ int i;
+ int count = tty_buffer_request_room(tty, msglen);
+
+ for (i = 0; i < count; i++) {
+ if (sysrq_mode) {
+ handle_sysrq(msg[i], tty);
+ sysrq_mode = 0;
+ } else if (msg[i] == BLUEGENECONS_MAGIC_SYSRQ_KEY)
+ sysrq_mode = 1;
+ else
+ tty_insert_flip_char(tty, msg[i], 0);
+ }
+ msglen -= count;
+ msg += count;
+ tty_flip_buffer_push(tty);
+ }
+ }
+ spin_unlock_irqrestore(&bgc.ttyLock, flags);
+
+ return;
+}
+
+
+/*
+ * Mailbox polling kernel thread.
+ *
+ * This thread wakes up at intervals to check for inbound mailbox messages
+ * and it will send waiting outbound messages if the outbound box is free.
+ */
+int kmboxd(void *arg)
+{
+ __set_current_state(TASK_RUNNING);
+ do {
+ int rc;
+
+ /* If there is anything in the inbox read it now. */
+ if (bluegene_testInboxAttention()) {
+ static char buffer[512];
+ int len;
+
+ /* Fetch any input */
+ len = bluegene_readFromMailboxConsole(buffer, sizeof(buffer));
+ if (len > 0)
+ bluegenecons_rcv(buffer, len);
+ }
+
+ /* Flush any console output that is buffered. */
+ rc = bgFlushOutboxMsgs();
+
+ /* If outbox buffer data was written then wake any TTY writer */
+ /* that is waiting. */
+ if (rc > 0 && bgc.tty) {
+ if ((bgc.tty->flags & (1 << TTY_DO_WRITE_WAKEUP))
+ && bgc.tty->ldisc.ops->write_wakeup)
+ (bgc.tty->ldisc.ops->write_wakeup)(bgc.tty);
+ wake_up_interruptible(&bgc.tty->write_wait);
+ }
+
+ wait_event_timeout(bgc.wait, 0, msecs_to_jiffies(10));
+ } while (!kthread_should_stop());
+
+ return 0;
+}
+
+
+#ifdef CONFIG_MAGIC_SYSRQ
+
+extern void ctrl_alt_del(void);
+
+static int bluegene_do_sysrq(void* data)
+{
+ int key = (int) data;
+ static char* env[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ "LD_LIBRARY_PATH=/lib:/usr/lib", NULL };
+
+ switch(key) {
+ case 'h' :
+ {
+ static char* argv[] = { "/etc/rc.shutdown", NULL };
+
+ kernel_execve(argv[0], argv, env);
+ printk(KERN_EMERG "Failure halting I/O node. Attempting secondary method.\n");
+ ctrl_alt_del();
+ break;
+ }
+
+ case 'x' :
+ {
+ static char* argv[] = { "/etc/rc.reboot", NULL };
+
+ kernel_execve(argv[0], argv, env);
+ printk(KERN_EMERG "Failure rebooting I/O node.\n");
+ break;
+ }
+
+ default :
+ printk(KERN_EMERG "Unknown sysrq '%c'\n", key);
+ }
+
+ return 0;
+}
+
+
+static void bluegene_handle_sysrq(int key, struct tty_struct *tty)
+{
+ struct task_struct* t = kthread_run(bluegene_do_sysrq, (void*) key, "Process System Request");
+
+ if (IS_ERR(t)) {
+ printk(KERN_EMERG "Failure creating sysrq '%c' thread.\n", (char) key);
+ bgWriteRasStr(bg_comp_kernel, bg_subcomp_linux, bg_code_sysrq_thread_create_failure,
+ "Failure creating sysrq thread.", 0);
+ if (key == 'h')
+ ctrl_alt_del();
+ }
+
+ return;
+}
+
+static struct sysrq_key_op bg_sysrq_halt_op = {
+ .handler = bluegene_handle_sysrq,
+ .help_msg = "Halt",
+ .action_msg = "Halt node"
+};
+
+static struct sysrq_key_op bg_sysrq_reboot_op = {
+ .handler = bluegene_handle_sysrq,
+ .help_msg = "Reboot",
+ .action_msg = "Reboot node"
+};
+#endif
+
+
+static struct tty_operations bgcons_ops = {
+ .open = bluegenecons_open,
+ .close = bluegenecons_close,
+ .write = bgWriteConsoleMsg,
+ .write_room = bgOutboxBufferFree,
+ .chars_in_buffer = bgOutboxBufferUsed,
+};
+
+
+/* Read interface not defined so we just return EOF */
+static int bluegene_rasevent_read(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ return 0;
+}
+
+
+/* Write the event. The user provides the payload...we provide the rest.
+ */
+static int bluegene_rasevent_write(struct file *file, const char *buffer,
+ unsigned long len, void *data)
+{
+ bg_ras ras;
+
+ /* Truncate the message if it is too large. */
+ if (len > sizeof(ras))
+ len = sizeof(ras);
+ else if (len < ((unsigned long) &ras.data - (unsigned long) &ras))
+ return -EIO;
+
+ if (copy_from_user(&ras, buffer, len))
+ return -EFAULT;
+ else {
+ if (!data)
+ bgWriteRasEvent(ras.comp, ras.subcomp, ras.code,
+ (unsigned int*) ras.data, ras.length);
+ else {
+ /* ASCII detail data was written. */
+ if (!ras.length)
+ ras.data[0] = '\0';
+ bgWriteRasStr(ras.comp, ras.subcomp, ras.code,
+ ras.data, ras.length);
+ }
+ }
+
+ return len;
+}
+
+
+static inline char* entryName(char* path)
+{
+ char* lastSlash = NULL;
+
+ while (*path) {
+ if (*path == '/')
+ lastSlash = path + 1;
+ path++;
+ }
+
+ return lastSlash;
+}
+
+
+static int __init bluegenecons_init(void)
+{
+
+ bgc.ttyDriver = alloc_tty_driver(1);
+ if (!bgc.ttyDriver) {
+ char* msg = "Failure allocating BlueGene console driver.";
+
+ bgprintf(msg);
+ bluegene_writeRASString(bg_comp_kernel, bg_subcomp_linux, bg_code_tty_alloc_failure, msg);
+ return -EIO;
+ }
+
+ bgc.ttyDriver->owner = THIS_MODULE;
+ bgc.ttyDriver->name = "bgcons";
+ bgc.ttyDriver->name_base = 1;
+ bgc.ttyDriver->major = BLUEGENE_MAJOR;
+ bgc.ttyDriver->minor_start = BLUEGENE_MINOR;
+ bgc.ttyDriver->type = TTY_DRIVER_TYPE_SYSTEM;
+ bgc.ttyDriver->init_termios = tty_std_termios;
+ bgc.ttyDriver->flags = TTY_DRIVER_REAL_RAW;
+ tty_set_operations(bgc.ttyDriver, &bgcons_ops);
+
+ if (tty_register_driver(bgc.ttyDriver)) {
+ char* msg = "Failure registering BlueGene console driver";
+
+ bgprintf(msg);
+ bluegene_writeRASString(bg_comp_kernel, bg_subcomp_linux, bg_code_tty_reg_failure, msg);
+ return -EIO;
+ }
+
+#ifdef CONFIG_MAGIC_SYSRQ
+ /* Sysrq h is sent by the control system to halt an ION during free_block */
+ register_sysrq_key('h', &bg_sysrq_halt_op);
+
+ /* Sysrq x is sent by the control system when ION reboot is requested. */
+ register_sysrq_key('x', &bg_sysrq_reboot_op);
+#endif
+
+ /* Kick off the kernel mailbox poll thread. */
+ init_waitqueue_head(&bgc.wait);
+ bgc.kmboxdTask = kthread_run(kmboxd, NULL, "kmboxd");
+ if (IS_ERR(bgc.kmboxdTask)) {
+ char* msg = "Failure creating mailbox processing thread.";
+
+ bgprintf(msg);
+ bluegene_writeRASString(bg_comp_kernel, bg_subcomp_linux, bg_code_mbox_thread_create_failure, msg);
+ put_tty_driver(bgc.ttyDriver);
+ return -EIO;
+ }
+
+ /* Create /proc RAS interfaces. */
+ proc_ras = create_proc_entry(entryName(BG_RAS_FILE), S_IFREG | S_IRWXUGO, NULL);
+ if (proc_ras) {
+ proc_ras->nlink = 1;
+ proc_ras->read_proc = (void*) bluegene_rasevent_read;
+ proc_ras->write_proc = (void*) bluegene_rasevent_write;
+ proc_ras->data = (void*) 0; // not ASCII message
+ }
+ proc_ras_ascii = create_proc_entry(entryName(BG_RAS_ASCII_FILE), S_IFREG | S_IRWXUGO, NULL);
+ if (proc_ras_ascii) {
+ proc_ras_ascii->nlink = 1;
+ proc_ras_ascii->read_proc = (void*) bluegene_rasevent_read;
+ proc_ras_ascii->write_proc = (void*) bluegene_rasevent_write;
+ proc_ras_ascii->data = (void*) 1; // is ASCII message
+ }
+
+ return 0;
+}
+
+static void __exit bluegenecons_exit(void)
+{
+ if (proc_ras) {
+ remove_proc_entry(proc_ras->name, NULL);
+ proc_ras = NULL;
+ }
+ if (proc_ras_ascii) {
+ remove_proc_entry(proc_ras_ascii->name, NULL);
+ proc_ras_ascii = NULL;
+ }
+
+ return;
+}
+
+/*
+ * Console write.
+ */
+static void bluegene_console_write(struct console *co, const char *b, unsigned count)
+{
+ if (count > 0)
+ bgWriteConsoleMsg(bgc.tty, b, count);
+}
+
+static struct tty_driver *bluegene_console_device(struct console *c, int *ip)
+{
+ *ip = 0;
+ return bgc.ttyDriver;
+}
+
+
+static struct console bgcons = {
+ .name = "bgcons",
+ .write = bluegene_console_write,
+ .device = bluegene_console_device,
+ .flags = CON_PRINTBUFFER,
+ .index = 0,
+};
+
+int __init bluegene_console_init(void)
+{
+ register_console(&bgcons);
+
+ return 0;
+}
+
+
+module_init(bluegenecons_init);
+module_exit(bluegenecons_exit);
+console_initcall(bluegene_console_init);
diff --git a/drivers/char/bluegene_networks.c b/drivers/char/bluegene_networks.c
new file mode 100644
index 00000000000000..5e06e0c4609ce3
--- /dev/null
+++ b/drivers/char/bluegene_networks.c
@@ -0,0 +1,202 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/semaphore.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+
+static int bgpnet_add_device(int major, int minor, const char* name, unsigned long long base);
+static int bgpnet_device_open(struct inode *inode, struct file *filp);
+static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *);
+static int bgpnet_device_release(struct inode *inode, struct file * filp);
+static int bgpnet_device_ioctl(struct inode *inode, struct file * filp,
+ unsigned int cmd, unsigned long arg);
+
+
+#define BGP_COL_MAJOR_NUM 120
+#define BGP_TORUS_MAJOR_NUM 121
+#define BGP_GI_MAJOR_NUM 122
+#define BGP_COL_MINOR_NUMS 2
+#define BGP_TORUS_MINOR_NUMS 2
+#define BGP_GI_MINOR_NUMS 4
+#define _BGP_UA_COL0 (0x6)
+#define _BGP_PA_COL0 (0x10000000)
+#define _BGP_UA_COL1 (0x6)
+#define _BGP_PA_COL1 (0x11000000)
+#define _BGP_UA_TORUS0 (0x6)
+#define _BGP_PA_TORUS0 (0x01140000)
+#define _BGP_UA_TORUS1 (0x6)
+#define _BGP_PA_TORUS1 (0x01150000)
+
+struct bgpnet_dev
+{
+ int major,minor; /* device major, minor */
+ unsigned long long physaddr; /* physical address */
+ struct task_struct* current; /* process holding device */
+ int signum; /* signal to send holding process */
+ wait_queue_head_t read_wq;
+ int read_complete;
+ void *regs; /* mapped regs (only used with col) */
+ struct semaphore sem; /* interruptible semaphore */
+ struct cdev cdev; /* container device? */
+};
+
+
+#define BGP_MAX_DEVICES 8
+static struct bgpnet_dev bgpnet_devices[BGP_MAX_DEVICES];
+static unsigned int bgpnet_num_devices = 0;
+
+
+static struct file_operations bgpnet_device_fops =
+{
+ .owner= THIS_MODULE,
+ .open= bgpnet_device_open,
+ .read = NULL,
+ .write= NULL,
+ .poll= NULL,
+ .ioctl= bgpnet_device_ioctl,
+ .release= bgpnet_device_release,
+ .mmap= bgpnet_device_mmap,
+};
+
+
+static int bgpnet_add_device(int major,
+ int minor,
+ const char* devname,
+ unsigned long long physaddr)
+{
+ int ret;
+ dev_t devno;
+ struct bgpnet_dev* dev = &bgpnet_devices[bgpnet_num_devices];
+
+ /* initilize struct */
+ init_MUTEX (&dev->sem);
+ dev->major = major;
+ dev->minor = minor;
+ dev->physaddr = physaddr;
+ init_waitqueue_head(&dev->read_wq);
+ dev->read_complete = 0;
+ if (physaddr) {
+ dev->regs = ioremap(physaddr, 4096);
+ }
+ devno=MKDEV(major,minor);
+
+ /* register i.e., /proc/devices */
+ ret=register_chrdev_region(devno,1,(char *)devname);
+
+ if (ret) {
+ printk (KERN_WARNING "bgpnet: couldn't register device (%d,%d) err=%d\n",
+ major,minor,ret);
+ return ret;
+ }
+
+ /* add cdev */
+ cdev_init(&dev->cdev,&bgpnet_device_fops);
+ dev->cdev.owner=THIS_MODULE;
+ dev->cdev.ops=&bgpnet_device_fops;
+ ret=cdev_add(&dev->cdev,devno,1);
+ if (ret) {
+ printk(KERN_WARNING "bgpnet: couldn't register device (%d,%d), err=%d\n",
+ major,minor,ret);
+ return ret;
+ }
+
+ /* signul to pass to owning process, should be altered using ioctl */
+ dev->signum=-1;
+
+ bgpnet_num_devices++;
+
+ return 0;
+}
+
+
+static int bgpnet_device_open (struct inode *inode, struct file *filp)
+{
+ struct bgpnet_dev *dev=container_of(inode->i_cdev,struct bgpnet_dev,cdev);
+
+ if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+ up(&dev->sem);
+
+ dev->current=current;
+ filp->private_data = (void*) dev;
+
+ return 0;
+}
+
+
+
+static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ unsigned long vsize = vma->vm_end - vma->vm_start;
+ struct bgpnet_dev * device = (struct bgpnet_dev *)filp->private_data;
+ int ret = -1;
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_flags |= VM_IO;
+ vma->vm_flags |= VM_RESERVED;
+
+ if (device->physaddr != 0)
+ ret = remap_pfn_range(vma,
+ vma->vm_start,
+ device->physaddr >> PAGE_SHIFT,
+ vsize,
+ vma->vm_page_prot);
+
+ if (ret)
+ printk (KERN_WARNING "bgpnet: mapping of device (%d,%d) failed\n",
+ device->major, device->minor);
+
+ return ret? -EAGAIN :0;
+}
+
+
+static int bgpnet_device_release (struct inode *inode, struct file * filp)
+{
+ struct bgpnet_dev *dev=(struct bgpnet_dev *)filp->private_data;
+
+ /*Ensure exclusive access*/
+ if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
+
+ dev->current = NULL;
+ up(&dev->sem);
+
+ return 0;
+}
+
+
+static int bgpnet_device_ioctl (struct inode *inode,
+ struct file * filp,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ return 0;
+}
+
+
+static int __init bgpnet_module_init(void)
+{
+ int rc = 0;
+ unsigned long long tr0, tr1, ts0, ts1;
+
+ tr0=((unsigned long long) _BGP_UA_COL0 << 32) + _BGP_PA_COL0;
+ tr1=((unsigned long long) _BGP_UA_COL1 << 32) + _BGP_PA_COL1;
+ ts0=((unsigned long long) _BGP_UA_TORUS0 << 32) + _BGP_PA_TORUS0;
+ ts1=((unsigned long long) _BGP_UA_TORUS1 << 32) + _BGP_PA_TORUS1;
+
+ bgpnet_add_device(BGP_COL_MAJOR_NUM, 0,"bgptree_vc0", tr0);
+ bgpnet_add_device(BGP_COL_MAJOR_NUM, 1, "bgptree_vc1", tr1);
+ bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 0, "bgptorus_g0", ts0);
+ bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 1, "bgptorus_g1", ts1);
+
+ return rc;
+}
+
+
+module_init(bgpnet_module_init);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 62d732a886f199..411fd60041755a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2628,6 +2628,166 @@ config QLGE
To compile this driver as a module, choose M here: the module
will be called qlge.
+config BGP_COLLECTIVE
+ tristate "BlueGene Ethernet-on-Collective support"
+ default y if BGP=y
+ depends on BGP
+ help
+ This driver supports the BlueGene Ethernet-over-collective
+ controller, for running IP between IO and Compute nodes.
+
+config BGP_COLLECTIVE_IP_CHECKSUM
+ bool "Request IP-layer software checksum on the BGP collective"
+ default y
+ depends on BGP_COLLECTIVE
+ help
+ The BlueGene collective network has hardware CRC-and-retry, which is stronger than IP checksum
+ But you can select IP checksumming as well.
+
+config BGP_COLLECTIVE_NAPI
+ tristate "BlueGene Ethernet-on-Collective NAPI support"
+ default n
+ depends on BGP
+ help
+ This configures the BGP collective driver to use NAPI interrupt mitigation
+
+config BGP_DMA
+ tristate "BlueGene Torus DMA support"
+ default y if BGP=y
+ depends on BGP
+ help
+ This driver supports the BlueGene torus DMA unit.
+ You will need it if you want to use BGP_TORUS
+
+config BGP_TORUS
+ tristate "BlueGene Ethernet-on-Torus support"
+ default y if BGP=y
+ depends on BGP
+ help
+ This driver supports the BlueGene Ethernet-over-torus
+ controller, for running IP amongst Compute nodes
+
+config BGP_TORUS_DIAGNOSTICS
+ bool "Diagnostics for BlueGene Ethernet-on-Torus"
+ default y if BGP=y
+ depends on BGP
+ help
+ This inserts diagnostics into the TCP layers, to support
+ optimisation of the IP-on-BlueGene-Torus code
+
+config BGP_FRANKENTORUS
+ tristate "BlueGene Ethernet-on-Torus support, vrnic-style"
+ default n
+ depends on BGP
+ help
+ This driver supports the BlueGene Ethernet-over-torus vrnic
+ controller, for running IP amongst Compute nodes
+
+config BGP_TORUS_IP_CHECKSUM
+ bool "Request IP-layer software checksum on the BGP torus"
+ default y
+ depends on BGP_TORUS
+ help
+ The BlueGene torus network has hardware CRC-and-retry, which is stronger than IP checksum
+ But you can select IP checksumming as well.
+
+config BGP_RECEPTION_MEMORY_FIFO_SHIFT
+ int "log2(BlueGene torus software reception FIFO size)"
+ depends on BGP
+ default "24"
+ help
+ FIFO should be somewhere between 64kB and 32MB
+
+config BGP_TORUS_ADAPTIVE_ROUTING
+ tristate "BlueGene Ethernet-on-Torus with adaptive routing"
+ default n
+ depends on BGP
+ help
+ Support for IP with adaptive packet routing on the torus (experimental)
+
+config BGP_VRNIC
+ tristate "BlueGene virtual RNIC support"
+ default m if BGP=y
+ depends on BGP
+ help
+ This driver supports the BlueGene virtual RNIC
+ controller, for running test cases against the vRNIC
+
+config BGP_VRNIC_START
+ hex "Real address start of BGP VRNIC. Linux not to inadvertently use real store in this region"
+ depends on BGP_VRNIC
+ default "0xe0000000"
+
+config BGP_VRNIC_SIZE
+ hex "Number of bytes of memory put over to BGP VRNIC. Linux not to inadvertently use real store in this region"
+ depends on BGP_VRNIC
+ default "0x10000000"
+
+config BGP_STATISTICS
+ tristate "BlueGene Statistics support"
+ default y if BGP=y
+ depends on BGP
+ help
+ This driver supports gathering of statistics related to
+ BlueGene/P hardware
+
+
+config BGP_E10000
+ tristate "BlueGene on-chip Ethernet support"
+ default Y if BGP=y
+ depends on BGP
+ help
+ This driver supports the BlueGene 10Gb on-chip Ethernet
+ controller.
+
+config BGP_E10000_RXB
+ int "Total size in bytes of receive buffers (1MB maximum)"
+ depends on BGP_E10000
+ default "1048576"
+
+config BGP_E10000_TXB
+ int "Number of transmit buffers"
+ depends on BGP_E10000
+ default "4096"
+
+config BGP_E10000_IP_CHECKSUM
+ bool "Enable HW checksum for TCP/UDP IPv4 traffic"
+ depends on BGP_E10000
+ default y
+
+config BGP_E10000_NAPI
+ bool "Enable 'new API' network interface"
+ depends on BGP_E10000
+ default n
+
+config BGP_E10000_EMAC_LOOPBACK
+ bool "Enable MAC loopback mode"
+ depends on BGP_E10000
+ help
+ This ties the output path directly to the input path at the MAC level.
+ default n
+
+config BGP_E10000_PHY_LOOPBACK
+ bool "Enable PHY loopback mode"
+ depends on BGP_E10000
+ help
+ This ties the output path directly to the input path in the PHY.
+ default n
+
+config BGP_E10000_DBG
+ bool "Debug enablement"
+ depends on BGP_E10000
+ help
+ This enables debug output.
+ default n
+
+config BGP_E10000_DBG_LEVEL
+ int "Debug level"
+ depends on BGP_E10000_DBG
+ help
+ This sets the amount of debug output.
+ default 57
+
source "drivers/net/sfc/Kconfig"
source "drivers/net/benet/Kconfig"
@@ -3092,6 +3252,23 @@ config NETPOLL_TRAP
config NET_POLL_CONTROLLER
def_bool NETPOLL
+config TCP_HIATUS_COUNTS
+ bool "TCP output hiatus counts"
+ default n
+ help
+ This option counts the number of times that TCP output is held back
+ by reason (e.g. 'congestion window filled'). It is useful if you are
+ trying to exploit fast networks, to help pin down what is limiting
+ the transfer rate.
+
+config TCP_CONGESTION_OVERRIDES
+ bool "TCP output congestion overrides"
+ default n
+ help
+ This option places controls in sysfs so that TCP congestion parameters
+ can be overridden system-wide; e.g. turning Nagle off
+
+
config VIRTIO_NET
tristate "Virtio network driver (EXPERIMENTAL)"
depends on EXPERIMENTAL && VIRTIO
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 471baaff229ff4..31b9f4376d18c8 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -5,6 +5,13 @@
obj-$(CONFIG_E1000) += e1000/
obj-$(CONFIG_E1000E) += e1000e/
obj-$(CONFIG_IBM_NEW_EMAC) += ibm_newemac/
+obj-$(CONFIG_BGP_E10000) += bgp_e10000/
+obj-$(CONFIG_BGP_COLLECTIVE) += bgp_collective/
+obj-$(CONFIG_BGP_TORUS) += bgp_torus/
+obj-$(CONFIG_BGP_FRANKENTORUS) += bgp_frankentorus/
+obj-$(CONFIG_BGP_VRNIC) += bgp_vrnic/
+obj-$(CONFIG_BGP_STATISTICS) += bgp_statistics/
+# obj-$(CONFIG_BLUEGENE_SOCKETS) += bgp_sockets/
obj-$(CONFIG_IGB) += igb/
obj-$(CONFIG_IXGBE) += ixgbe/
obj-$(CONFIG_IXGB) += ixgb/
diff --git a/drivers/net/bgp_collective/Makefile b/drivers/net/bgp_collective/Makefile
new file mode 100644
index 00000000000000..29fbe0adf6b54d
--- /dev/null
+++ b/drivers/net/bgp_collective/Makefile
@@ -0,0 +1,7 @@
+# Makefile for BlueGene collective and torus driver
+
+EXTRA_CFLAGS += -I$(BGPHOME)/bgp/arch/include -Iarch/powerpc/syslib/bgdd/ -Iarch/ppc/syslib/bgdd/ -g -dA -D__LINUX_KERNEL__
+
+bgp_collective-y := bgcol.o bgnet.o
+
+obj-$(CONFIG_BGP_COLLECTIVE) += bgp_collective.o
diff --git a/drivers/net/bgp_collective/bgcol.c b/drivers/net/bgp_collective/bgcol.c
new file mode 100644
index 00000000000000..7e10a137839ed3
--- /dev/null
+++ b/drivers/net/bgp_collective/bgcol.c
@@ -0,0 +1,3330 @@
+/*********************************************************************
+ *
+ * Description: Blue Gene low-level driver for collective network
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors:
+ * Chris Ward <tjcw@uk.ibm.com>
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ * Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * The protocol implemented here will send a 'jumbo' (9000 byte) frame
+ * in 38 packets, i.e. 240 bytes payload + 16 bytes link header per packet.
+ * The measured throughput was 4325 Mbit/sec on one IO link
+ *
+ * It is logically possible to send a 'jumbo' frame in 36 packets; to
+ * do this you need to pack 255 bytes of payload + 1 byte of link
+ * header per packet (you need to at least indicate which node has sent
+ * the packet); you probably want to do this by 'trampling' the first
+ * byte of each packet, sending a 'correction' byte sequence at the
+ * end of the frame, and having the receiver demultiplex and correct
+ * the frames.
+ * This should achieve 4565 Mbit/sec
+ *
+ * If you were to drive the link with an MTU of close to 65535, you
+ * could send a 65270-byte frame in 256 packets, which should achieve
+ * 4655 Mbit/sec.
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <net/arp.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/prom.h>
+
+
+#include "bglink.h"
+#include "bgcol.h"
+#include "bgnet.h"
+#include "bgp_dcr.h"
+#include "ppc450.h"
+
+#include <asm/bluegene.h>
+
+#define DRV_NAME "bgcol"
+#define DRV_VERSION "1.0"
+#define DRV_DESC "IBM Blue Gene Collective Driver"
+
+MODULE_DESCRIPTION(DRV_DESC);
+MODULE_AUTHOR("IBM");
+MODULE_LICENSE("GPL");
+
+/* SA_ONSTACK is deprecated, but its replacement has not made it into MCP yet. Compatibility ... */
+#if !defined(IRQF_DISABLED)
+#define IRQF_DISABLED SA_ONSTACK
+#endif
+
+/* configuration selector macros */
+#define COLLECTIVE_RECEIVE_WITH_SLIH
+/* #define COLLECTIVE_DELIVER_VIA_TASKLET */
+/* #define COLLECTIVE_BREAK_ON_FRAME */
+#define COLLECTIVE_TRANSMIT_WITH_SLIH
+#define COLLECTIVE_TRANSMIT_WITH_FLIH
+#define COLLECTIVE_XMITTER_FREES
+#define COLLECTIVE_DUPLEX_SLIH
+#define COLLECTIVE_ONEPASS_TXRX
+#define BGP_COL_STATUS_VISIBILITY
+
+
+extern void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ;
+
+/* For diagnosis of certain link sequencing problems, it can be useful to keep a trail of */
+/* recently-arrived link headers. Set this macro if you want a trail kept */
+/* #define KEEP_LNKHDR_TRAIL */
+enum {
+ k_lnkhdr_trail_display_length = 50, /* Link header amount of trail to display */
+ k_lnkhdr_trail_length = 64, /* Link header ring buffer length, next power of 2 above k_lnkhdr_trail_display_length. */
+ k_lnhhdr_ffdc_limit = 20 /* First-failure-data-capture limit, we want to catch first failures and not saturate the logging system */
+};
+
+/* For diagnostics, track the last thing that we knew happened to the bgcol in interrupt mode */
+enum {
+ k_bgcolaction_none ,
+ k_bgcolaction_xmit ,
+ k_bgcolaction_xmit_enable ,
+ k_bgcolaction_xmit_irq ,
+ k_bgcolaction_xmit_irq_disable
+
+};
+
+struct bglink_proto * proto_array[k_link_protocol_limit] ;
+
+/* static int bgcolaction ; */
+
+extern int e10000_diag_count ;
+
+
+/* #define CONFIG_BLUEGENE_COLLECTIVE_TRACE */
+
+/* #define REQUIRE_TRACE */
+
+#include <linux/KernelFxLog.h>
+
+#include "../bgp_network/bgp_net_traceflags.h"
+
+/* #if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) */
+/* static int bgcol_debug_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */
+int bgcol_debug_tracemask = k_t_init | k_t_request | k_t_protocol ;
+/* int bgcol_debug_tracemask = 0xffffffff ; */
+/* #endif */
+
+/* Can drop bits out of COMPILED_TRACEMASK if we want to selectively compile out trace */
+#define COMPILED_TRACEMASK (0xffffffff-k_t_detail-k_t_fifocontents)
+/* #define COMPILED_TRACEMASK (k_t_error) */
+
+#define XTRACEN(i,x...)
+#if defined(REQUIRE_TRACE)
+#define TRACE(x...) KernelFxLog(1,x)
+#define TRACE1(x...) KernelFxLog(1,x)
+#define TRACE2(x...) KernelFxLog(1,x)
+#define TRACEN(i,x...) KernelFxLog(1,x)
+#define TRACED(x...) KernelFxLog(1,x)
+#define TRACES(x...) KernelFxLog(1,x)
+#elif defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+#define TRACE(x...) KernelFxLog(bgcol_debug_tracemask & k_t_general,x)
+#define TRACE1(x...) KernelFxLog(bgcol_debug_tracemask & k_t_lowvol,x)
+#define TRACE2(x...) KernelFxLog(bgcol_debug_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgcol_debug_tracemask & (COMPILED_TRACEMASK & (i)),x)
+#define TRACED(x...) KernelFxLog(1,x)
+#define TRACES(x...) KernelFxLog(1,x)
+#else
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#define TRACES(x...)
+#endif
+
+#define _BGP_DCR_COL 0
+
+#define FRAGMENT_TIMEOUT (HZ/10)
+
+#define COL_LNKHDRLEN (sizeof(struct bglink_hdr_col))
+#define COL_FRAGPAYLOAD (COL_PAYLOAD - COL_LNKHDRLEN)
+#define COL_SKB_ALIGN 16
+
+
+#define BGP_COL_MAJOR_NUM 120
+#define BGP_TORUS_MAJOR_NUM 121
+#define BGP_GI_MAJOR_NUM 122
+#define BGP_COL_MINOR_NUMS 2
+#define BGP_TORUS_MINOR_NUMS 2
+#define BGP_GI_MINOR_NUMS 4
+#define _BGP_UA_COL0 (0x6)
+#define _BGP_PA_COL0 (0x10000000)
+#define _BGP_UA_COL1 (0x6)
+#define _BGP_PA_COL1 (0x11000000)
+#define _BGP_UA_TORUS0 (0x6)
+#define _BGP_PA_TORUS0 (0x01140000)
+#define _BGP_UA_TORUS1 (0x6)
+#define _BGP_PA_TORUS1 (0x01150000)
+
+/*
+ * 'Oversized' skbuffs are an attempt to increase throughput on the collective interface by arranging for
+ * 2 cores to work together on pulling data and distributing it. See commentary in bgnet.c as to what needs
+ * to be done to get it to work.
+ * Having an skbuff at 64K rather than 9K (to match etherhet 'jumbo' frames) doesn't really cost much memory;
+ * we are only likely to have a few MB of skbuffs in each IO node, and less in each compute node.
+ */
+enum {
+ k_use_plentiful_skb = 1 , /* Whether to use an oversized sk_buff to receive in to */
+ k_plentiful_skb_size = 256*COL_FRAGPAYLOAD
+};
+
+static void bgcol_prefill(struct sk_buff_head * skb_list, unsigned int count)
+{
+ unsigned int x ;
+ for(x=0;x<count;x+=1)
+ {
+ struct sk_buff *skb=alloc_skb(k_plentiful_skb_size,GFP_KERNEL) ;
+ if(skb)
+ {
+ skb_queue_tail(skb_list,skb) ;
+ }
+
+ }
+}
+
+static struct sk_buff * take_skb_from_list_for_filling(struct bg_col *col)
+{
+ return skb_dequeue (&col->skb_list_for_filling) ;
+}
+
+static void replenish_list_for_filling(struct bg_col *col)
+{
+ struct sk_buff *skb=alloc_skb(k_plentiful_skb_size,GFP_KERNEL) ;
+ if(skb)
+ {
+ skb_queue_tail(&col->skb_list_for_filling,skb) ;
+ }
+
+}
+/* int bgcol_diagnostic_use_napi ; */
+/*
+ * device management
+ */
+
+#define BGP_MAX_DEVICES 8
+static struct bgpnet_dev bgpnet_devices[BGP_MAX_DEVICES];
+/* static unsigned int bgpnet_num_devices = 0; */
+
+
+static struct proc_dir_entry* bgpnetDir;
+/* static struct proc_dir_entry* barrierEntry; */
+static struct proc_dir_entry* statisticsEntry;
+static struct proc_dir_entry* statusEntry;
+/* static struct proc_dir_entry* tracemaskEntry; */
+struct bg_col static_col;
+
+static struct bg_col *__bgcol = &static_col ;
+
+/* static int bgpnet_add_device(int major, int minor, const char* name, */
+/* unsigned long long base, int irq, */
+/* irqreturn_t (*irq_handler)(int, void*)); */
+/* static int bgpnet_device_open(struct inode *inode, struct file *filp); */
+/* static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *); */
+/* static int bgpnet_device_release(struct inode *inode, struct file * filp); */
+/* static int bgpnet_device_ioctl(struct inode *inode, struct file * filp, */
+/* unsigned int cmd, unsigned long arg); */
+/* static ssize_t bgpnet_device_read(struct file *filp, char __user *buf, size_t count, */
+/* loff_t *f_pos); */
+/* static unsigned int bgpnet_device_poll(struct file *file, poll_table * wait); */
+
+
+/* static struct file_operations bgpnet_device_fops = */
+/* { */
+/* .owner= THIS_MODULE, */
+/* .open= bgpnet_device_open, */
+/* .read= bgpnet_device_read, */
+/* .write= NULL, */
+/* .poll= bgpnet_device_poll, */
+/* .ioctl= bgpnet_device_ioctl, */
+/* .release= bgpnet_device_release, */
+/* .mmap= bgpnet_device_mmap, */
+/* }; */
+
+struct bg_col *bgcol_get_dev()
+{
+ return __bgcol;
+}
+
+unsigned int bgcol_get_nodeid(struct bg_col* col)
+{
+ return col->nodeid;
+}
+
+/**********************************************************************
+ * IRQs
+ **********************************************************************/
+
+/* static irqreturn_t bgcol_unhandled_interrupt(int irq, void *dev, struct pt_regs* regs) */
+/* { */
+/* panic("col: unhandled irq %d\n", irq); */
+/* } */
+
+static irqreturn_t bgcol_duplex_interrupt(int irq, void *dev);
+
+#define IRQ_IDX_INJECT 0
+#define IRQ_IDX_RECEIVE 1
+
+#define DEF_IRQ(_irq, _name, _handler) \
+{ .irq = _irq, .name = _name, .handler = _handler }
+
+#define BG_COL_IRQ_INJ 180
+#define BG_COL_IRQ_RCV 181
+
+#define BG_COL_IRQ_GROUP 5
+#define BG_COL_IRQ_INJ_GINT 20
+#define BG_COL_IRQ_RCV_GINT 21
+
+/* Linux 'virtual interrupt' numbers corresponding to how the collective is wired to the BIC */
+enum {
+ k_inject_irq = (5*32 + 20) + 32 ,
+ k_receive_irq = (5*32 + 21) + 32
+} ;
+
+static struct {
+ unsigned irq;
+ char *name;
+ irqreturn_t (*handler)(int irq, void *dev);
+} bgcol_irqs [] = {
+ DEF_IRQ(k_inject_irq, "Tree inject", bgcol_duplex_interrupt), /* IRQ_IDX_INJECT */
+ DEF_IRQ(k_receive_irq, "Tree receive", bgcol_duplex_interrupt), /* IRQ_IDX_RECEIVE */
+#if 0
+ DEF_IRQ("Tree VC0", bgcol_receive_interrupt),
+ DEF_IRQ("Tree VC1", bgcol_receive_interrupt),
+ DEF_IRQ("Tree CRNI timeout", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree no-target", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree ALU overflow", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree local client inject", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree local client receive", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree write send CH0", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree ECC send CH0", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree link CRC send CH0", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree write send CH1", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree ECC send CH1", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree link CRC send CH1", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree write send CH2", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree ECC send CH2", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree link CRC send CH2", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree ECC rcv CH0", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree link CRC rcv CH0", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree ECC rcv CH1", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree link CRC rcv CH1", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree ECC rcv CH2", bgcol_unhandled_interrupt),
+ DEF_IRQ("Tree link CRC rcv CH2", bgcol_unhandled_interrupt),
+#endif
+ { -1,NULL, NULL }
+};
+
+
+/**********************************************************************
+ * Debug
+ **********************************************************************/
+
+static inline void dump_skb(struct sk_buff *skb)
+{
+ TRACEN(k_t_general,"sk_buff at %p, data=%p, len=%d", skb,skb->data, skb->len) ;
+#if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+ if( bgcol_debug_tracemask & k_t_detail )
+ {
+ int i;
+ for (i = 0; i < skb->len / 4 + 1; i++)
+ printk("%08x%c", ((u32*)skb->data)[i], (i + 1) % 8 ? ' ' : '\n');
+ printk("\n");
+ }
+#endif
+}
+
+static inline void dump_skb_partial(struct sk_buff *skb, int maxlength)
+{
+ TRACEN(k_t_general,"sk_buff at %p, data=%p, len=%d", skb,skb->data, skb->len) ;
+#if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+ if( bgcol_debug_tracemask & k_t_detail )
+ {
+ int j = (maxlength > skb->len) ? skb->len : maxlength ;
+ int i;
+ for (i = 0; i < j / 4 + 1; i++)
+ printk("%08x%c", ((u32*)skb->data)[i], (i + 1) % 8 ? ' ' : '\n');
+ printk("\n");
+ }
+#endif
+}
+
+static inline void dump_bgcol_packet(struct bglink_hdr_col *lnkhdr, void * payload)
+ {
+ TRACEN(k_t_general,"bgcol_packet: hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x",
+ lnkhdr->conn_id, lnkhdr->this_pkt, lnkhdr->total_pkt, lnkhdr->dst_key, lnkhdr->src_key);
+#if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+ if( bgcol_debug_tracemask & k_t_detail )
+ {
+ int i ;
+ int * pi = (int *) payload ;
+ for( i=0; i<COL_FRAGPAYLOAD/sizeof(int); i += 8)
+ {
+ TRACEN(k_t_bgcolpkt," %04x %08x %08x %08x %08x %08x %08x %08x %08x",
+ 4*i, pi[i+0], pi[i+1], pi[i+2], pi[i+3], pi[i+4], pi[i+5], pi[i+6], pi[i+7]
+ ) ;
+ }
+ }
+#endif
+ }
+
+/* Delivery of skbuffs to linux networking layer */
+/* Deliver an 'sk_buff' via a work queue, so that 'this' core can spend its time draining the collective hardware */
+struct bgcol_workqueue_item
+{
+ struct work_struct work ;
+ struct bglink_proto *proto ;
+ unsigned int src_key ;
+};
+static void bgcol_workqueue_actor(struct work_struct * work)
+{
+ char * cb = (char *) work ;
+ struct sk_buff *skb = (struct sk_buff *) (cb - offsetof(struct sk_buff, cb)) ;
+ struct bgcol_workqueue_item * bgcol_work =(struct bgcol_workqueue_item *) work ;
+ TRACEN(k_t_napi,"(>) work=%p skb=%p", work, skb) ;
+ bgcol_work->proto->col_rcv_trimmed(&static_col,skb,bgcol_work->proto,bgcol_work->src_key) ;
+ replenish_list_for_filling(&static_col) ;
+ TRACEN(k_t_napi,"(<)") ;
+}
+static void bgcol_deliver_via_workqueue(struct sk_buff *skb, struct bglink_hdr_col *lnkhdr, struct bglink_proto *proto )
+{
+ struct bgcol_workqueue_item * bgcol_work = (struct bgcol_workqueue_item *)(skb->cb) ;
+ int rc ;
+ TRACEN(k_t_napi,"(>)skb=%p", skb) ;
+ __skb_pull(skb, lnkhdr->opt.opt_net.pad_head);
+ __skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail);
+ INIT_WORK(&bgcol_work->work,bgcol_workqueue_actor) ;
+ bgcol_work->proto = proto ;
+ bgcol_work->src_key = lnkhdr->src_key ;
+ rc=schedule_work_on(k_WorkqueueDeliveryCPU,&bgcol_work->work) ;
+ TRACEN(k_t_napi,"(<) rc=%d",rc) ;
+}
+/**********************************************************************
+ * Interrupt handling
+ **********************************************************************/
+
+/* Enable receive interrupts */
+void bgcol_enable_interrupts(struct bg_col *bgcol)
+{
+ unsigned rec_enable;
+ unsigned long flags ;
+ TRACE( "(>) bgcol=%p", bgcol);
+ printk(KERN_NOTICE "enable ints \n");
+
+ spin_lock_irqsave(&bgcol->lock, flags);
+
+ /* set watermarks */
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG0, _TR_GLOB_VCFG_RWM(0) );
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG1, _TR_GLOB_VCFG_RWM(0) );
+ /* set watermarks */
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_GLOB_VCFG0, _TR_GLOB_VCFG_IWM(4) ); /* let transmit fifos get half empty before interrupting */
+
+ rec_enable = mfdcrx(bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN);
+ rec_enable |= COL_IRQMASK_REC;
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, rec_enable );
+
+ /* clear exception flags */
+ mfdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXF );
+ mfdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXF );
+
+ spin_unlock_irqrestore(&bgcol->lock, flags);
+ TRACE( "(<) rec_enable:0x%08x", rec_enable);
+}
+
+static inline void bgcol_enable_interrupts_rcv(struct bg_col *bgcol)
+{
+ unsigned rec_enable;
+ TRACE( "(>) bgcol=%p", bgcol);
+ rec_enable = COL_IRQMASK_REC ;
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, rec_enable );
+
+ TRACE( "(<) rec_enable:0x%08x", rec_enable);
+}
+
+static inline void bgcol_enable_interrupts_xmit(struct bg_col *bgcol)
+{
+ TRACE( "bgcol=%p", bgcol);
+
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXEN, (_TR_INJ_PIX_ENABLE | _TR_INJ_PIX_WM0 ) );
+
+}
+
+
+static inline void bgcol_disable_interrupts(struct bg_col *bgcol)
+{
+ TRACEN(k_t_irqflow,"bgcol=%p", bgcol);
+
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXEN, _TR_INJ_PIX_ENABLE );
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, 0 );
+
+}
+
+static inline void bgcol_disable_interrupts_rcv(struct bg_col *bgcol)
+{
+ TRACEN(k_t_irqflow,"bgcol=%p", bgcol);
+
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_REC_PRXEN, 0 );
+
+}
+
+static inline void bgcol_disable_interrupts_xmit(struct bg_col *bgcol)
+{
+ TRACEN(k_t_irqflow, "bgcol=%p", bgcol);
+
+ mtdcrx( bgcol->dcrbase + _BGP_DCR_TR_INJ_PIXEN, _TR_INJ_PIX_ENABLE );
+}
+void bgcol_enable_rcv_wm_interrupt(struct bgcol_channel* chn)
+{
+ unsigned long flags;
+ unsigned long prxen;
+
+ spin_lock_irqsave(&chn->col->lock, flags);
+ chn->irq_rcv_pending_mask = COL_IRQ_RCV_PENDING_MASK(chn->idx);
+ prxen = mfdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN);
+ if (chn->idx)
+ mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, prxen | _TR_REC_PRX_WM1);
+ else
+ mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, prxen | _TR_REC_PRX_WM0);
+ spin_unlock_irqrestore(&chn->col->lock, flags);
+
+ return;
+}
+static void inj_timeout(unsigned long colArg)
+{
+ printk(KERN_INFO "bgcol: inject fifo timed out!\n");
+}
+
+void bgcol_set_mtu(struct bg_col *bgcol, unsigned int mtu)
+ {
+ unsigned int max_packets_per_frame=(mtu+COL_FRAGPAYLOAD-1) / COL_FRAGPAYLOAD ;
+ bgcol->max_packets_per_frame = max_packets_per_frame ;
+ bgcol->mtu = max_packets_per_frame * COL_FRAGPAYLOAD + COL_SKB_ALIGN ;
+ }
+
+/* Inject a 16-byte header and a COL_FRAGPAYLOAD-byte payload */
+static inline void bgcol_payload_inject(void *port, void* first_quad, void *remaining_quads)
+ {
+/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+ asm volatile(
+ "lfpdx 0,0,%[first_quad] \n\t" /* F0=Q0 load */
+ "li 3,16 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li 4,32 \n\t" /* Indexing values */
+ "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */
+ "li 3,48 \n\t" /* Indexing values */
+ "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */
+ "li 4,64 \n\t" /* Indexing values */
+ "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */
+ "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */
+ "li 3,80 \n\t" /* Indexing values */
+ "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */
+ "li 4,96 \n\t" /* Indexing values */
+ "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */
+ "li 3,112 \n\t" /* Indexing values */
+ "stfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "stfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "stfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */
+ "li 4,128 \n\t" /* Indexing values */
+ "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */
+ "li 3,144 \n\t" /* Indexing values */
+ "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */
+ "li 4,160 \n\t" /* Indexing values */
+ "stfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "stfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "stfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "lfpdx 0,3,%[remaining_quads] \n\t" /* F0=Q10 load */
+ "li 3,176 \n\t" /* Indexing values */
+ "lfpdx 1,4,%[remaining_quads] \n\t" /* F1=Q11 load */
+ "li 4,192 \n\t" /* Indexing values */
+ "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q12 load */
+ "li 3,208 \n\t" /* Indexing values */
+ "stfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "stfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "stfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q13 load */
+ "li 4,224 \n\t" /* Indexing values */
+ "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q14 load */
+ "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q15 load */
+ "stfpdx 0,0,%[port] \n\t" /* Q10 store */
+ "stfpdx 1,0,%[port] \n\t" /* Q11 store */
+ "stfpdx 2,0,%[port] \n\t" /* Q12 store */
+ "stfpdx 3,0,%[port] \n\t" /* Q13 store */
+ "stfpdx 4,0,%[port] \n\t" /* Q14 store */
+ "stfpdx 5,0,%[port] \n\t" /* Q15 store */
+ :
+ : [first_quad] "b" (first_quad) , /* Inputs */
+ [remaining_quads] "b" (remaining_quads),
+ [port] "b" (port)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9",
+ "r3" , "r4" );
+ }
+/* Inject a 16-byte header and a COL_FRAGPAYLOAD-byte payload */
+static inline void bgcol_payload_inject2(void *port, double* first_quad_0, double* first_quad_1, void *remaining_quads)
+ {
+/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+ asm volatile(
+ "lfdx 0,0,%[first_quad_0] \n\t" /* F0=Q0 load */
+ "lfsdx 0,0,%[first_quad_1] \n\t" /* F0=Q0 load */
+ "li 3,16 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li 4,32 \n\t" /* Indexing values */
+ "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */
+ "li 3,48 \n\t" /* Indexing values */
+ "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */
+ "li 4,64 \n\t" /* Indexing values */
+ "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */
+ "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */
+ "li 3,80 \n\t" /* Indexing values */
+ "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */
+ "li 4,96 \n\t" /* Indexing values */
+ "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */
+ "li 3,112 \n\t" /* Indexing values */
+ "stfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "stfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "stfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */
+ "li 4,128 \n\t" /* Indexing values */
+ "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */
+ "li 3,144 \n\t" /* Indexing values */
+ "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */
+ "li 4,160 \n\t" /* Indexing values */
+ "stfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "stfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "stfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "lfpdx 0,3,%[remaining_quads] \n\t" /* F0=Q10 load */
+ "li 3,176 \n\t" /* Indexing values */
+ "lfpdx 1,4,%[remaining_quads] \n\t" /* F1=Q11 load */
+ "li 4,192 \n\t" /* Indexing values */
+ "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q12 load */
+ "li 3,208 \n\t" /* Indexing values */
+ "stfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "stfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "stfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q13 load */
+ "li 4,224 \n\t" /* Indexing values */
+ "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q14 load */
+ "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q15 load */
+ "stfpdx 0,0,%[port] \n\t" /* Q10 store */
+ "stfpdx 1,0,%[port] \n\t" /* Q11 store */
+ "stfpdx 2,0,%[port] \n\t" /* Q12 store */
+ "stfpdx 3,0,%[port] \n\t" /* Q13 store */
+ "stfpdx 4,0,%[port] \n\t" /* Q14 store */
+ "stfpdx 5,0,%[port] \n\t" /* Q15 store */
+ :
+ : [first_quad_0] "b" (first_quad_0) , /* Inputs */
+ [first_quad_1] "b" (first_quad_1) , /* Inputs */
+ [remaining_quads] "b" (remaining_quads),
+ [port] "b" (port)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9",
+ "r3" , "r4" );
+ }
+/* load a bgcol payload's worth from memory into registers */
+static inline void bgcol_payload_inject_load(void* first_quad, void *remaining_quads)
+ {
+/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+ asm volatile(
+ "lfpdx 0,0,%[first_quad] \n\t" /* F0=Q0 load */
+ "li 3,16 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li 4,32 \n\t" /* Indexing values */
+ "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */
+ "li 3,48 \n\t" /* Indexing values */
+ "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */
+ "li 4,64 \n\t" /* Indexing values */
+ "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */
+ "li 3,80 \n\t" /* Indexing values */
+ "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */
+ "li 4,96 \n\t" /* Indexing values */
+ "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */
+ "li 3,112 \n\t" /* Indexing values */
+ "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */
+ "li 4,128 \n\t" /* Indexing values */
+ "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */
+ "li 3,144 \n\t" /* Indexing values */
+ "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */
+ "li 4,160 \n\t" /* Indexing values */
+ "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */
+ "li 3,176 \n\t" /* Indexing values */
+ "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */
+ "li 4,192 \n\t" /* Indexing values */
+ "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */
+ "li 3,208 \n\t" /* Indexing values */
+ "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */
+ "li 4,224 \n\t" /* Indexing values */
+ "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */
+ "lfpdx 15,4,%[remaining_quads] \n\t" /* F5=Q15 load */
+ :
+ : [first_quad] "b" (first_quad) , /* Inputs */
+ [remaining_quads] "b" (remaining_quads)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15","r3" , "r4" );
+ }
+static inline void bgcol_payload_inject_load2(double* first_quad_0, double* first_quad_1, void *remaining_quads)
+ {
+/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+ asm volatile(
+ "lfdx 0,0,%[first_quad_0] \n\t" /* F0=Q0 load */
+ "lfsdx 0,0,%[first_quad_1] \n\t" /* F0=Q0 load */
+ "li 3,16 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li 4,32 \n\t" /* Indexing values */
+ "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */
+ "li 3,48 \n\t" /* Indexing values */
+ "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */
+ "li 4,64 \n\t" /* Indexing values */
+ "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */
+ "li 3,80 \n\t" /* Indexing values */
+ "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */
+ "li 4,96 \n\t" /* Indexing values */
+ "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */
+ "li 3,112 \n\t" /* Indexing values */
+ "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */
+ "li 4,128 \n\t" /* Indexing values */
+ "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */
+ "li 3,144 \n\t" /* Indexing values */
+ "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */
+ "li 4,160 \n\t" /* Indexing values */
+ "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */
+ "li 3,176 \n\t" /* Indexing values */
+ "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */
+ "li 4,192 \n\t" /* Indexing values */
+ "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */
+ "li 3,208 \n\t" /* Indexing values */
+ "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */
+ "li 4,224 \n\t" /* Indexing values */
+ "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */
+ "lfpdx 15,4,%[remaining_quads] \n\t" /* F5=Q15 load */
+ :
+ : [first_quad_0] "b" (first_quad_0) , /* Inputs */
+ [first_quad_1] "b" (first_quad_1) , /* Inputs */
+ [remaining_quads] "b" (remaining_quads)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15","r3" , "r4" );
+ }
+static inline void bgcol_payload_inject_load2partial(double* first_quad_0, double* first_quad_1, void *remaining_quads, int quadcount )
+ {
+/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+ asm volatile(
+ "mtctr %[quadcount] \n\t"
+ "lfdx 0,0,%[first_quad_0] \n\t" /* F0=Q0 load */
+ "lfsdx 0,0,%[first_quad_1] \n\t" /* F0=Q0 load */
+ "li 3,16 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 4,32 \n\t" /* Indexing values */
+ "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 3,48 \n\t" /* Indexing values */
+ "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 4,64 \n\t" /* Indexing values */
+ "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 3,80 \n\t" /* Indexing values */
+ "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 4,96 \n\t" /* Indexing values */
+ "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 3,112 \n\t" /* Indexing values */
+ "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 4,128 \n\t" /* Indexing values */
+ "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 3,144 \n\t" /* Indexing values */
+ "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 4,160 \n\t" /* Indexing values */
+ "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 3,176 \n\t" /* Indexing values */
+ "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 4,192 \n\t" /* Indexing values */
+ "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 3,208 \n\t" /* Indexing values */
+ "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "li 4,224 \n\t" /* Indexing values */
+ "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */
+ "bdz 1 \n\t" /* Skip out if done */
+ "lfpdx 15,4,%[remaining_quads] \n" /* F5=Q15 load */
+ "1: \n\t" /* Jump-out label */
+ :
+ : [first_quad_0] "b" (first_quad_0) , /* Inputs */
+ [first_quad_1] "b" (first_quad_1) , /* Inputs */
+ [remaining_quads] "b" (remaining_quads) ,
+ [quadcount] "r" (quadcount)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15","r3" , "r4" );
+ }
+static inline void bgcol_payload_inject_storeload(void *port, void* first_quad, void *remaining_quads)
+ {
+/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+ asm volatile(
+ "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */
+ "lfpdx 0,0,%[first_quad] \n\t" /* F0=Q0 load */
+ "stfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "li 3,16 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "stfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "li 4,32 \n\t" /* Indexing values */
+ "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */
+ "stfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "li 3,48 \n\t" /* Indexing values */
+ "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */
+ "stfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "li 4,64 \n\t" /* Indexing values */
+ "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */
+ "stfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "li 3,80 \n\t" /* Indexing values */
+ "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */
+ "stfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "li 4,96 \n\t" /* Indexing values */
+ "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */
+ "stfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "li 3,112 \n\t" /* Indexing values */
+ "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */
+ "stfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "li 4,128 \n\t" /* Indexing values */
+ "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */
+ "stfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "li 3,144 \n\t" /* Indexing values */
+ "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */
+ "stfpdx 10,0,%[port] \n\t" /* Q10 store */
+ "li 4,160 \n\t" /* Indexing values */
+ "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */
+ "stfpdx 11,0,%[port] \n\t" /* Q11 store */
+ "li 3,176 \n\t" /* Indexing values */
+ "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */
+ "stfpdx 12,0,%[port] \n\t" /* Q12 store */
+ "li 4,192 \n\t" /* Indexing values */
+ "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */
+ "stfpdx 13,0,%[port] \n\t" /* Q13 store */
+ "li 3,208 \n\t" /* Indexing values */
+ "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */
+ "stfpdx 14,0,%[port] \n\t" /* Q14 store */
+ "li 4,224 \n\t" /* Indexing values */
+ "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */
+ "stfpdx 15,0,%[port] \n\t" /* Q15 store */
+ "lfpdx 15,4,%[remaining_quads] \n\t" /* F5=Q15 load */
+ :
+ : [first_quad] "b" (first_quad) , /* Inputs */
+ [remaining_quads] "b" (remaining_quads),
+ [port] "b" (port)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15","r3" , "r4" );
+ }
+static inline void bgcol_payload_inject_storeload2(void *port, double* first_quad_0, double* first_quad_1, void *remaining_quads)
+ {
+/* BUG_ON((((int)first_quad) & 0xf) != 0) ; */
+/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+ asm volatile(
+ "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */
+ "lfdx 0,0,%[first_quad_0] \n\t" /* F0=Q0 load */
+ "lfsdx 0,0,%[first_quad_1] \n\t" /* F0=Q0 load */
+ "stfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "li 3,16 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "stfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "li 4,32 \n\t" /* Indexing values */
+ "lfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */
+ "stfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "li 3,48 \n\t" /* Indexing values */
+ "lfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */
+ "stfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "li 4,64 \n\t" /* Indexing values */
+ "lfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */
+ "stfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "li 3,80 \n\t" /* Indexing values */
+ "lfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */
+ "stfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "li 4,96 \n\t" /* Indexing values */
+ "lfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */
+ "stfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "li 3,112 \n\t" /* Indexing values */
+ "lfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */
+ "stfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "li 4,128 \n\t" /* Indexing values */
+ "lfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */
+ "stfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "li 3,144 \n\t" /* Indexing values */
+ "lfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */
+ "stfpdx 10,0,%[port] \n\t" /* Q10 store */
+ "li 4,160 \n\t" /* Indexing values */
+ "lfpdx 10,3,%[remaining_quads] \n\t" /* F0=Q10 load */
+ "stfpdx 11,0,%[port] \n\t" /* Q11 store */
+ "li 3,176 \n\t" /* Indexing values */
+ "lfpdx 11,4,%[remaining_quads] \n\t" /* F1=Q11 load */
+ "stfpdx 12,0,%[port] \n\t" /* Q12 store */
+ "li 4,192 \n\t" /* Indexing values */
+ "lfpdx 12,3,%[remaining_quads] \n\t" /* F2=Q12 load */
+ "stfpdx 13,0,%[port] \n\t" /* Q13 store */
+ "li 3,208 \n\t" /* Indexing values */
+ "lfpdx 13,4,%[remaining_quads] \n\t" /* F3=Q13 load */
+ "stfpdx 14,0,%[port] \n\t" /* Q14 store */
+ "li 4,224 \n\t" /* Indexing values */
+ "lfpdx 14,3,%[remaining_quads] \n\t" /* F4=Q14 load */
+ "stfpdx 15,0,%[port] \n\t" /* Q15 store */
+ "lfpdx 15,4,%[remaining_quads] \n\t" /* F5=Q15 load */
+ :
+ : [first_quad_0] "b" (first_quad_0) , /* Inputs */
+ [first_quad_1] "b" (first_quad_1) , /* Inputs */
+ [remaining_quads] "b" (remaining_quads),
+ [port] "b" (port)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15","r3" , "r4" );
+ }
+static inline void bgcol_payload_inject_store(void *port)
+ {
+ asm volatile(
+ "stfpdx 0,0,%[port] \n\t" /* Q0 store to TR0_DI */
+ "stfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "stfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "stfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "stfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "stfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "stfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "stfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "stfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "stfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "stfpdx 10,0,%[port] \n\t" /* Q10 store */
+ "stfpdx 11,0,%[port] \n\t" /* Q11 store */
+ "stfpdx 12,0,%[port] \n\t" /* Q12 store */
+ "stfpdx 13,0,%[port] \n\t" /* Q13 store */
+ "stfpdx 14,0,%[port] \n\t" /* Q14 store */
+ "stfpdx 15,0,%[port] \n\t" /* Q15 store */
+ :
+ : /* inputs */
+ [port] "b" (port)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15" );
+ }
+
+/* receive a COL_FRAGPAYLOAD-byte payload */
+static inline void bgcol_payload_receive240(void *port, void *remaining_quads)
+ {
+/* BUG_ON((((int)remaining_quads) & 0xf) != 0) ; */
+ asm volatile(
+ "lfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "lfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "lfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "lfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "lfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "lfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "lfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "lfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "lfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "lfpdx 0,0,%[port] \n\t" /* Q10 store */
+ "li 3,16 \n\t" /* Indexing values */
+ "stfpdx 1,0,%[remaining_quads] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li 4,32 \n\t" /* Indexing values */
+ "stfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q2 load */
+ "lfpdx 1,0,%[port] \n\t" /* Q11 store */
+ "li 3,48 \n\t" /* Indexing values */
+ "stfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q3 load */
+ "lfpdx 2,0,%[port] \n\t" /* Q12 store */
+ "li 4,64 \n\t" /* Indexing values */
+ "stfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q4 load */
+ "lfpdx 3,0,%[port] \n\t" /* Q13 store */
+ "li 3,80 \n\t" /* Indexing values */
+ "stfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q5 load */
+ "lfpdx 4,0,%[port] \n\t" /* Q14 store */
+ "li 4,96 \n\t" /* Indexing values */
+ "stfpdx 6,3,%[remaining_quads] \n\t" /* F6=Q6 load */
+ "lfpdx 5,0,%[port] \n\t" /* Q15 store */
+ "li 3,112 \n\t" /* Indexing values */
+ "stfpdx 7,4,%[remaining_quads] \n\t" /* F7=Q7 load */
+ "li 4,128 \n\t" /* Indexing values */
+ "stfpdx 8,3,%[remaining_quads] \n\t" /* F8=Q8 load */
+ "li 3,144 \n\t" /* Indexing values */
+ "stfpdx 9,4,%[remaining_quads] \n\t" /* F9=Q9 load */
+ "li 4,160 \n\t" /* Indexing values */
+ "stfpdx 0,3,%[remaining_quads] \n\t" /* F0=Q10 load */
+ "li 3,176 \n\t" /* Indexing values */
+ "stfpdx 1,4,%[remaining_quads] \n\t" /* F1=Q11 load */
+ "li 4,192 \n\t" /* Indexing values */
+ "stfpdx 2,3,%[remaining_quads] \n\t" /* F2=Q12 load */
+ "li 3,208 \n\t" /* Indexing values */
+ "stfpdx 3,4,%[remaining_quads] \n\t" /* F3=Q13 load */
+ "li 4,224 \n\t" /* Indexing values */
+ "stfpdx 4,3,%[remaining_quads] \n\t" /* F4=Q14 load */
+ "stfpdx 5,4,%[remaining_quads] \n\t" /* F5=Q15 load */
+ :
+ : /* Inputs */
+ [remaining_quads] "b" (remaining_quads),
+ [port] "b" (port)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "r3" , "r4" );
+ }
+
+
+/* Load a full bgcol payload into 16 parallel floating point registers */
+/* Caution ... the compiler doesn't know that we want the regs later on */
+static inline unsigned int bgcol_payload_load(
+ void *port, /* The FIFO port */
+ void *lnkhdr, /* Where to put the first 16 bytes of the payload */
+ void *destport /* Which address to tap to ask for the next packet */
+ )
+ {
+ unsigned int src_key ;
+ unsigned int dummy ;
+ struct { unsigned char c [16] ; } *lnkhdrc = lnkhdr ;
+/* BUG_ON((((int)lnkhdr) & 0xf) != 0) ; */
+
+ asm (
+ "lfpdx 0,0,%[port] \n\t" /* lnkhdr */
+ "lfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "lfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "lfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "lfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "lfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "lfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "stfpdx 0,0,%[lnkhdr] \n\t"
+ "lfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "lfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "lfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "lfpdx 10,0,%[port] \n\t" /* Q10 store */
+ "lfpdx 11,0,%[port] \n\t" /* Q11 store */
+ "lfpdx 12,0,%[port] \n\t" /* Q12 store */
+ "lwz %[src_key],4(%[lnkhdr]) \n\t"
+ "lfpdx 13,0,%[port] \n\t" /* Q13 store */
+ "lfpdx 14,0,%[port] \n\t" /* Q14 store */
+ "lfpdx 15,0,%[port] \n\t" /* Q15 store */
+ "lwz %[dummy],0(%[destport]) \n\t" /* trigger to pull the next packet in */
+ : /* outputs */
+ [dummy] "=r" (dummy),
+ [src_key] "=b" (src_key),
+ "=m" (*lnkhdrc)
+ : /* Inputs */
+ [port] "b" (port) ,
+ [lnkhdr] "b" (lnkhdrc) ,
+ [destport] "b" (destport)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15"
+ );
+ TRACEN(k_t_fifocontents, "bgcol_payload_load src_key=%08x",src_key) ;
+ return src_key ;
+ }
+
+static inline unsigned int bgcol_payload_load2(
+ void *port, /* The FIFO port */
+ double *lnkhdr0, /* Where to put the first 8 bytes of the payload */
+ double *lnkhdr1, /* Where to put the second 8 bytes of the payload */
+ void *destport /* Which address to tap to ask for the next packet */
+ )
+ {
+ unsigned int src_key ;
+ unsigned int dummy ;
+/* BUG_ON((((int)lnkhdr0) & 0x07) != 0) ; */
+/* BUG_ON((((int)lnkhdr1) & 0x07) != 0) ; */
+
+ asm (
+ "lfpdx 0,0,%[port] \n\t" /* lnkhdr */
+ "lfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "lfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "lfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "lfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "lfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "lfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "stfdx 0,0,%[lnkhdr0] \n\t"
+ "lfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "stfsdx 0,0,%[lnkhdr1] \n\t"
+ "lfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "lfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "lfpdx 10,0,%[port] \n\t" /* Q10 store */
+ "lfpdx 11,0,%[port] \n\t" /* Q11 store */
+ "lfpdx 12,0,%[port] \n\t" /* Q12 store */
+ "lwz %[src_key],4(%[lnkhdr0]) \n\t"
+ "lfpdx 13,0,%[port] \n\t" /* Q13 store */
+ "lfpdx 14,0,%[port] \n\t" /* Q14 store */
+ "lfpdx 15,0,%[port] \n\t" /* Q15 store */
+ "lwz %[dummy],0(%[destport]) \n\t" /* trigger to pull the next packet in */
+ : /* outputs */
+ [dummy] "=r" (dummy),
+ [src_key] "=b" (src_key),
+ "=m" (*lnkhdr0),
+ "=m" (*lnkhdr1)
+ : /* Inputs */
+ [port] "b" (port) ,
+ [lnkhdr0] "b" (lnkhdr0) ,
+ [lnkhdr1] "b" (lnkhdr1) ,
+ [destport] "b" (destport)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15"
+ );
+ TRACEN(k_t_fifocontents, "bgcol_payload_load src_key=%08x",src_key) ;
+ return src_key ;
+ }
+
+/* Save the previous payload to store, and load the next payload from FIFO */
+static inline unsigned int bgcol_payload_storeload(
+ void *port,
+ void *lnkhdr,
+ void * payloadptr,
+ void *destport )
+ {
+ unsigned int index1 ;
+ unsigned int index2 ;
+ unsigned int src_key ;
+ struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload ;
+ struct { unsigned char c [16] ; } *lnkhdrc ;
+/* BUG_ON((((int)lnkhdr) & 0xf) != 0) ; */
+/* BUG_ON((((int)payloadptr) & 0xf) != 0) ; */
+
+ lnkhdrc = lnkhdr ;
+
+ payload = payloadptr;
+ TRACEN(k_t_fifocontents, "bgcol_payload_storeload payload=%p",payloadptr) ;
+
+ asm (
+ "lfpdx 0,0,%[port] \n\t" /* lnkhdr */
+ "li %[index1],16 \n\t" /* Indexing values */
+ "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li %[index2],32 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "stfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */
+ "li %[index1],48 \n\t" /* Indexing values */
+ "lfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "stfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */
+ "li %[index2],64 \n\t" /* Indexing values */
+ "lfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "stfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */
+ "li %[index1],80 \n\t" /* Indexing values */
+ "lfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "stfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */
+ "li %[index2],96 \n\t" /* Indexing values */
+ "lfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "stfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */
+ "li %[index1],112 \n\t" /* Indexing values */
+ "lfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "stfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */
+ "li %[index2],128 \n\t" /* Indexing values */
+ "lfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "stfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */
+ "li %[index1],144 \n\t" /* Indexing values */
+ "lfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "stfpdx 0,0,%[lnkhdr] \n\t"
+ "stfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */
+ "li %[index2],160 \n\t" /* Indexing values */
+ "lfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "stfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */
+ "li %[index1],176 \n\t" /* Indexing values */
+ "lfpdx 10,0,%[port] \n\t" /* Q10 store */
+ "stfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */
+ "li %[index2],192 \n\t" /* Indexing values */
+ "lfpdx 11,0,%[port] \n\t" /* Q11 store */
+ "stfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */
+ "li %[index1],208 \n\t" /* Indexing values */
+ "lfpdx 12,0,%[port] \n\t" /* Q12 store */
+ "stfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */
+ "li %[index2],224 \n\t" /* Indexing values */
+ "lfpdx 13,0,%[port] \n\t" /* Q13 store */
+ "lwz %[src_key],4(%[lnkhdr]) \n\t"
+ "stfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */
+ "lfpdx 14,0,%[port] \n\t" /* Q14 store */
+ "stfpdx 15,%[index2],%[payload] \n\t" /* F5=Q15 load */
+ "lfpdx 15,0,%[port] \n\t" /* Q15 store */
+ "lwz %[index1],0(%[destport]) \n\t" /* trigger to pull the next packet in */
+ : /* outputs */
+ [src_key] "=b" (src_key),
+ "=m" (*payload),
+ "=m" (*lnkhdrc),
+ [index1] "=b" (index1),
+ [index2] "=b" (index2)
+ : /* Inputs */
+ [port] "b" (port) ,
+ [payload] "b" (payload),
+ [lnkhdr] "b" (lnkhdrc) ,
+ [destport] "b" (destport)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15"
+ );
+
+ TRACEN(k_t_fifocontents, "bgcol_payload_storeload src_key=%08x",src_key) ;
+ return src_key ;
+ }
+
+static inline unsigned int bgcol_payload_storeload2(
+ void *port,
+ double *lnkhdr0,
+ double *lnkhdr1,
+ void * payloadptr,
+ void *destport )
+ {
+ unsigned int index1 ;
+ unsigned int index2 ;
+ unsigned int src_key ;
+ struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload ;
+ /* BUG_ON((((int)lnkhdr0) & 0x07) != 0) ; */
+ /* BUG_ON((((int)lnkhdr1) & 0x07) != 0) ; */
+/* BUG_ON((((int)payloadptr) & 0xf) != 0) ; */
+
+
+ payload = payloadptr;
+ TRACEN(k_t_fifocontents, "bgcol_payload_storeload payload=%p",payloadptr) ;
+
+ asm (
+ "lfpdx 0,0,%[port] \n\t" /* lnkhdr */
+ "li %[index1],16 \n\t" /* Indexing values */
+ "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li %[index2],32 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "stfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */
+ "li %[index1],48 \n\t" /* Indexing values */
+ "lfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "stfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */
+ "li %[index2],64 \n\t" /* Indexing values */
+ "lfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "stfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */
+ "li %[index1],80 \n\t" /* Indexing values */
+ "lfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "stfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */
+ "li %[index2],96 \n\t" /* Indexing values */
+ "lfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "stfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */
+ "li %[index1],112 \n\t" /* Indexing values */
+ "lfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "stfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */
+ "li %[index2],128 \n\t" /* Indexing values */
+ "lfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "stfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */
+ "li %[index1],144 \n\t" /* Indexing values */
+ "lfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "stfdx 0,0,%[lnkhdr0] \n\t"
+ "stfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */
+ "li %[index2],160 \n\t" /* Indexing values */
+ "lfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "stfsdx 0,0,%[lnkhdr1] \n\t"
+ "stfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */
+ "li %[index1],176 \n\t" /* Indexing values */
+ "lfpdx 10,0,%[port] \n\t" /* Q10 store */
+ "stfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */
+ "li %[index2],192 \n\t" /* Indexing values */
+ "lfpdx 11,0,%[port] \n\t" /* Q11 store */
+ "stfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */
+ "li %[index1],208 \n\t" /* Indexing values */
+ "lfpdx 12,0,%[port] \n\t" /* Q12 store */
+ "stfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */
+ "li %[index2],224 \n\t" /* Indexing values */
+ "lfpdx 13,0,%[port] \n\t" /* Q13 store */
+ "lwz %[src_key],4(%[lnkhdr0]) \n\t"
+ "stfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */
+ "lfpdx 14,0,%[port] \n\t" /* Q14 store */
+ "stfpdx 15,%[index2],%[payload] \n\t" /* F5=Q15 load */
+ "lfpdx 15,0,%[port] \n\t" /* Q15 store */
+ "lwz %[index1],0(%[destport]) \n\t" /* trigger to pull the next packet in */
+ : /* outputs */
+ [src_key] "=b" (src_key),
+ "=m" (*payload),
+ "=m" (*lnkhdr0),
+ "=m" (*lnkhdr1),
+ [index1] "=b" (index1),
+ [index2] "=b" (index2)
+ : /* Inputs */
+ [port] "b" (port) ,
+ [payload] "b" (payload),
+ [lnkhdr0] "b" (lnkhdr0) ,
+ [lnkhdr1] "b" (lnkhdr1) ,
+ [destport] "b" (destport)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15"
+ );
+
+ TRACEN(k_t_fifocontents, "bgcol_payload_storeload src_key=%08x",src_key) ;
+ return src_key ;
+ }
+
+/* Save the previous payload to store */
+static inline void bgcol_payload_store(
+ void * payloadptr)
+ {
+ unsigned int index1 ;
+ unsigned int index2 ;
+ struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload=payloadptr ;
+/* BUG_ON((((int)payloadptr) & 0xf) != 0) ; */
+
+ TRACEN(k_t_fifocontents, "bgcol_payload_store payload=%p",payload) ;
+ asm (
+ "li %[index1],16 \n\t" /* Indexing values */
+ "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li %[index2],32 \n\t" /* Indexing values */
+ "stfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */
+ "li %[index1],48 \n\t" /* Indexing values */
+ "stfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */
+ "li %[index2],64 \n\t" /* Indexing values */
+ "stfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */
+ "li %[index1],80 \n\t" /* Indexing values */
+ "stfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */
+ "li %[index2],96 \n\t" /* Indexing values */
+ "stfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */
+ "li %[index1],112 \n\t" /* Indexing values */
+ "stfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */
+ "li %[index2],128 \n\t" /* Indexing values */
+ "stfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */
+ "li %[index1],144 \n\t" /* Indexing values */
+ "stfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */
+ "li %[index2],160 \n\t" /* Indexing values */
+ "stfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */
+ "li %[index1],176 \n\t" /* Indexing values */
+ "stfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */
+ "li %[index2],192 \n\t" /* Indexing values */
+ "stfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */
+ "li %[index1],208 \n\t" /* Indexing values */
+ "stfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */
+ "li %[index2],224 \n\t" /* Indexing values */
+ "stfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */
+ "stfpdx 15,%[index2],%[payload] \n\t" /* F5=Q15 load */
+ : /* outputs */
+ "=m" (*payload),
+ [index1] "=b" (index1),
+ [index2] "=b" (index2)
+ : /* Inputs */
+ [payload] "b" (payload) /* inputs */
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15"
+ );
+ }
+
+/* receive 256 bytes, a 16-byte header and a 240-byte payload */
+/* returns the 'source key', the key of the node which sent the data */
+
+static inline int bgcol_payload_receive256(void *port,
+ void *lnkhdr,
+ unsigned char * payload_table[],
+ unsigned int table_index_mask,
+ void *destport )
+ {
+ int table_offset ;
+ int src_key ;
+ struct { unsigned char c [COL_FRAGPAYLOAD] ; } *payload ;
+ struct { unsigned char c [16] ; } *lnkhdrc = lnkhdr ;
+
+ asm (
+ "lfpdx 0,0,%[port] \n\t" /* lnkhdr */
+ "lfpdx 1,0,%[port] \n\t" /* Q1 store */
+ "lfpdx 2,0,%[port] \n\t" /* Q2 store */
+ "lfpdx 3,0,%[port] \n\t" /* Q3 store */
+ "lfpdx 4,0,%[port] \n\t" /* Q4 store */
+ "lfpdx 5,0,%[port] \n\t" /* Q5 store */
+ "lfpdx 6,0,%[port] \n\t" /* Q6 store */
+ "stfpdx 0,0,%[lnkhdr] \n\t"
+ "lfpdx 7,0,%[port] \n\t" /* Q7 store */
+ "lfpdx 8,0,%[port] \n\t" /* Q8 store */
+ "lfpdx 9,0,%[port] \n\t" /* Q9 store */
+ "lwz %[src_key],4(%[lnkhdr]) \n\t"
+ "lfpdx 10,0,%[port] \n\t" /* Q10 store */
+ "lfpdx 11,0,%[port] \n\t" /* Q11 store */
+ "lfpdx 12,0,%[port] \n\t" /* Q12 store */
+ "and 3,%[src_key],%[table_index_mask] \n\t"
+ "lfpdx 13,0,%[port] \n\t" /* Q13 store */
+ "slwi %[table_offset],3,2 \n\t"
+ "lfpdx 14,0,%[port] \n\t" /* Q14 store */
+ "lwzx %[payload],%[table_offset],%[payload_table] \n\t"
+ "lfpdx 15,0,%[port] \n\t" /* Q15 store */
+ "lwz 5,0(%[destport]) \n\t" /* trigger to pull the next packet in */
+ "li 3,16 \n\t" /* Indexing values */
+ "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li 4,32 \n\t" /* Indexing values */
+ "stfpdx 2,3,%[payload] \n\t" /* F2=Q2 load */
+ "li 3,48 \n\t" /* Indexing values */
+ "stfpdx 3,4,%[payload] \n\t" /* F3=Q3 load */
+ "li 4,64 \n\t" /* Indexing values */
+ "stfpdx 4,3,%[payload] \n\t" /* F4=Q4 load */
+ "li 3,80 \n\t" /* Indexing values */
+ "stfpdx 5,4,%[payload] \n\t" /* F5=Q5 load */
+ "li 4,96 \n\t" /* Indexing values */
+ "stfpdx 6,3,%[payload] \n\t" /* F6=Q6 load */
+ "li 3,112 \n\t" /* Indexing values */
+ "stfpdx 7,4,%[payload] \n\t" /* F7=Q7 load */
+ "li 4,128 \n\t" /* Indexing values */
+ "stfpdx 8,3,%[payload] \n\t" /* F8=Q8 load */
+ "li 3,144 \n\t" /* Indexing values */
+ "stfpdx 9,4,%[payload] \n\t" /* F9=Q9 load */
+ "li 4,160 \n\t" /* Indexing values */
+ "stfpdx 10,3,%[payload] \n\t" /* F0=Q10 load */
+ "li 3,176 \n\t" /* Indexing values */
+ "stfpdx 11,4,%[payload] \n\t" /* F1=Q11 load */
+ "li 4,192 \n\t" /* Indexing values */
+ "stfpdx 12,3,%[payload] \n\t" /* F2=Q12 load */
+ "li 3,208 \n\t" /* Indexing values */
+ "stfpdx 13,4,%[payload] \n\t" /* F3=Q13 load */
+ "li 4,224 \n\t" /* Indexing values */
+ "stfpdx 14,3,%[payload] \n\t" /* F4=Q14 load */
+ "stfpdx 15,4,%[payload] \n\t" /* F5=Q15 load */
+ : [payload] "=b" (payload), /* outputs */
+ [src_key] "=b" (src_key),
+ [table_offset] "=b" (table_offset),
+ "=m" (*payload),
+ "=m" (*lnkhdrc)
+ : /* Inputs */
+ [port] "b" (port) ,
+ [lnkhdr] "b" (lnkhdrc) ,
+ [payload_table] "b" (payload_table) ,
+ [table_index_mask] "b" (table_index_mask) ,
+ [destport] "b" (destport)
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14",
+ "fr15",
+ "r3" , "r4", "r5"
+ );
+ TRACEN(k_t_fifocontents, "bgcol_payload_receive256 table_offset=%08x payload=%p\n src_key=%08x",table_offset,payload,src_key) ;
+ return src_key ;
+ }
+/**********************************************************************
+ * Receive and transmit
+ **********************************************************************/
+
+/* #if defined(COLLECTIVE_DELIVER_VIA_TASKLET) */
+/* static void bgcol_receive_proto_tasklet_handler(unsigned long dummy) */
+/* { */
+/* struct bg_col *bgcol = __bgcol; */
+/* struct sk_buff *skb = skb_dequeue(&bgcol->fragskb_list_rcv); */
+/* */
+/* TRACE("bgnet: (>)bgcol_receive_proto_tasklet_handler"); */
+/* */
+/* while( skb ) */
+/* { */
+// /* deliver to upper protocol layers */
+/* struct bglink_hdr_col *lnkhdrp = (struct bglink_hdr_col *)&(skb->cb) ; */
+/* struct bglink_proto *proto; */
+/* proto = bgcol_find_linkproto(lnkhdrp->lnk_proto); */
+/* if (proto) */
+/* { */
+/* */
+/* TRACE("Handed to proto rcv=%p", proto->rcv) ; */
+/* TRACE("hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x proto=%x", lnkhdrp->conn_id, lnkhdrp->this_pkt, lnkhdrp->total_pkt, lnkhdrp->dst_key, lnkhdrp->src_key, lnkhdrp->lnk_proto); */
+/* dump_skb_partial(skb,64) ; */
+/* TRACE("proto->rcv=%p skb=%p lnkhdrp=%p proto=%p", */
+/* proto->rcv,skb, lnkhdrp, proto */
+/* ) ; */
+/* (void) proto->rcv(skb, lnkhdrp, proto); */
+/* } */
+/* else */
+/* { */
+/* dump_skb_partial(skb,64); */
+/* TRACE("bgcol: unsupported link protocol (%p) %x", proto, lnkhdrp->lnk_proto); */
+/* dev_kfree_skb(skb); */
+/* } */
+/* skb = skb_dequeue(&bgcol->fragskb_list_rcv) ; */
+/* } */
+/* */
+/* TRACE("bgnet: (<)bgcol_receive_proto_tasklet_handler"); */
+/* */
+/* } */
+/* */
+/* static DECLARE_TASKLET(bgcol_receive_proto_tasklet,bgcol_receive_proto_tasklet_handler,0); */
+/* #endif */
+
+static inline void bgcol_vacate_slot(struct bg_col *bgcol, unsigned int slot)
+ {
+ bgcol->per_eth_table[slot].payload = (void *)0xffffffff ; /* so we get a trap if we try to store through it */
+ bgcol->per_eth_table[slot].expect = 0xffffffff ;
+ bgcol->skb_rcv_table[slot] = NULL ;
+ TRACEN(k_t_general,"Slot %d vacated",slot );
+ }
+
+
+static void init_ethkey_table(struct bg_col *bgcol)
+ {
+ int x ;
+ for( x = 0 ; x < k_ethkey_table_size ; x += 1)
+ {
+ bgcol_vacate_slot(bgcol,x) ;
+ }
+ }
+
+#if defined(KEEP_LNKHDR_TRAIL)
+static struct bglink_hdr_col lnkhdr_trail[k_lnkhdr_trail_length] ;
+static unsigned int lnkhdr_trail_index ;
+static unsigned int lnkhdr_trail_shown_index ;
+static int trail_shown_count ;
+
+static void record_lnkhdr_trail(struct bglink_hdr_col *lnkhdr)
+ {
+ lnkhdr_trail[lnkhdr_trail_index & (k_lnkhdr_trail_length-1)] = *lnkhdr ;
+ lnkhdr_trail_index += 1 ;
+ }
+
+static void show_lnkhdr_trail(const char * reason)
+ {
+ if( trail_shown_count < k_lnhhdr_ffdc_limit )
+ {
+ unsigned int trail_count = (k_lnkhdr_trail_display_length > lnkhdr_trail_index) ? lnkhdr_trail_index : k_lnkhdr_trail_display_length ;
+ unsigned int current_index = lnkhdr_trail_index - trail_count ;
+ printk(KERN_INFO "lnkhdr trail to packet %d, reason <%s>:\n", lnkhdr_trail_index, reason) ;
+ while( current_index != lnkhdr_trail_index)
+ {
+ unsigned int x = ( current_index & (k_lnkhdr_trail_length-1)) ;
+ if( current_index >= lnkhdr_trail_shown_index )
+ {
+ printk(KERN_INFO "lnkhdr_trail[%02x] dst_key=%08x src_key=%08x conn_id=%04x this_pkt=%02x total_pkt=%02x lnk_proto=%04x opt=[%02x:%02x:%02x]\n",
+ (current_index-lnkhdr_trail_index) & 0xff,
+ lnkhdr_trail[x].dst_key,
+ lnkhdr_trail[x].src_key,
+ lnkhdr_trail[x].conn_id,
+ lnkhdr_trail[x].this_pkt,
+ lnkhdr_trail[x].total_pkt,
+ lnkhdr_trail[x].lnk_proto,
+ lnkhdr_trail[x].opt.opt_net.option,
+ lnkhdr_trail[x].opt.opt_net.pad_head,
+ lnkhdr_trail[x].opt.opt_net.pad_tail
+ ) ;
+ }
+ current_index += 1 ;
+
+ }
+ trail_shown_count += 1 ;
+ lnkhdr_trail_shown_index = lnkhdr_trail_index ;
+ }
+ }
+
+static void show_payload(void * payload, unsigned int mioaddr)
+{
+ if( trail_shown_count < k_lnhhdr_ffdc_limit )
+ {
+ unsigned int *pi=(unsigned int *) payload ;
+ unsigned int x ;
+ for(x=0; x<240/sizeof(unsigned int)-9; x+=8)
+ {
+ printk(KERN_INFO "payload [%08x %08x %08x %08x %08x %08x %08x %08x]\n",
+ pi[x],pi[x+1],pi[x+2],pi[x+3],pi[x+4],pi[x+5],pi[x+6],pi[x+7]
+ ) ;
+ } ;
+ printk(KERN_INFO "payload [%08x %08x %08x %08x]\n",
+ pi[x],pi[x+1],pi[x+2],pi[x+3]
+ ) ;
+ }
+}
+#else
+static inline void record_lnkhdr_trail(struct bglink_hdr_col *lnkhdr)
+ {
+
+ }
+static inline void show_lnkhdr_trail(const char * reason)
+ {
+ TRACE("%s", reason);
+ }
+static void show_payload(void * payload, unsigned int mioaddr)
+{
+ TRACE("payload=%p mioaddr=0x%08x", payload, mioaddr);
+}
+
+#endif
+
+#if !defined(COLLECTIVE_DELIVER_VIA_TASKLET)
+static inline void bgcol_deliver_directly(struct bg_col *bgcol,struct bglink_hdr_col *lnkhdr, struct sk_buff *skb)
+ {
+ struct bglink_proto *proto;
+
+ /* deliver to upper protocol layers */
+ proto = bglink_find_proto(lnkhdr->lnk_proto);
+ if(!bgcol->deliver_without_workqueue)
+ {
+ TRACEN(k_t_general,"Delivering skb=%p via work queue",skb) ;
+ bgcol_deliver_via_workqueue(skb, lnkhdr,proto) ;
+
+ }
+ else
+ {
+ if (proto)
+ {
+ TRACE("Handed to proto=%p", proto) ;
+ TRACE("hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x proto=%x", lnkhdr->conn_id, lnkhdr->this_pkt, lnkhdr->total_pkt, lnkhdr->dst_key, lnkhdr->src_key, lnkhdr->lnk_proto);
+ dump_skb_partial(skb,64) ;
+ TRACE("proto->col_rcv=%p skb=%p lnkhdr=%p proto=%p",
+ proto->col_rcv,skb, lnkhdr, proto
+ ) ;
+ (void) proto->col_rcv(bgcol,skb, lnkhdr, proto);
+ /* enable_kernel_fp() ; */
+ }
+ else
+ {
+ dump_skb_partial(skb,64);
+ TRACE("bgcol: unsupported link protocol (%p) %x", proto, lnkhdr->lnk_proto);
+ dev_kfree_skb(skb);
+ }
+ replenish_list_for_filling(bgcol) ;
+ }
+ }
+#endif
+
+static char scratch_payload[COL_FRAGPAYLOAD] __attribute__((aligned(16)));
+static inline int bgcol_receive_mark3(struct bg_col *bgcol, unsigned channel,unsigned int status_in, unsigned int mioaddr)
+{
+ void *payloadptr;
+/* union bgcol_status status; */
+ unsigned int unload_count ;
+ unsigned int unload_index ;
+ struct bglink_hdr_col lnkhdr __attribute__((aligned(8)));
+ double *lnkhdrd = (double *)&lnkhdr ;
+ unsigned int total_unload_count = 0 ;
+ unsigned int end_frame_hint = 0 ;
+#if defined(KEEP_RECV_TOTAL)
+ unsigned int recv_total = bgcol->recv_total ;
+#endif
+/* bgcol->recv_total += total_unload_count ; */
+
+/* status.raw = status_in ; */
+/* unload_count = status.x.rcv_hdr ; */
+ unload_count = bgcol_status_rcv_hdr(status_in) ;
+/* bgcol->recv_fifo_histogram2[unload_count & 0x0f ] += 1; */
+ TRACE("status=%08x", status_in);
+
+#if defined(KEEP_RECV_TOTAL)
+ bgcol->recv_total = recv_total + unload_count ; /* Not exact, for the case where we exit the loop early, but good enought for statistics */
+#endif
+#if defined(COLLECTIVE_ONEPASS_TXRX)
+ if(unload_count > 0)
+#else
+ while(unload_count > 0)
+#endif
+ {
+ unsigned int received_src_key ;
+ unsigned int slot ;
+ unsigned int received_seq ;
+ unsigned int expected_seq ;
+
+ unsigned int seq_next_packet ;
+ unsigned int seq_tot_packet ;
+ unsigned char * deposited_payload ;
+ unsigned char * next_payload ;
+ unsigned int received ;
+ unsigned int expected ;
+
+ /* Load up the FP regs with the first packet from the FIFO, and get ready to analyze it */
+ received_src_key=bgcol_payload_load2((void*)mioaddr + _BGP_TRx_DR,lnkhdrd,lnkhdrd+1,(void*)(mioaddr + _BGP_TRx_HR)) ;
+#if defined(KEEP_LNKHDR_TRAIL)
+ record_lnkhdr_trail(&lnkhdr) ;
+#endif
+ slot = received_src_key & (k_ethkey_table_size-1) ;
+ received = ((unsigned int *)&lnkhdr)[2] ;
+ expected = bgcol->per_eth_table[slot].expect ;
+ /* Find if it was an 'expected' packet in context of previous packets from this source */
+ received_seq = ( received >> 8 ) & 0xff ;
+ expected_seq = ( expected >> 8 ) & 0xff ;
+ seq_tot_packet = expected & 0xff ;
+ seq_next_packet = expected_seq + 1 ;
+
+ bgcol->per_eth_table[slot].expect = expected + 0x0100 ;
+
+ deposited_payload = bgcol->per_eth_table[slot].payload ;
+ next_payload = deposited_payload + COL_FRAGPAYLOAD ;
+
+ TRACEN(k_t_detail,"slot=%08x seq(%x,%x) re(%08x,%08x)",
+ slot,
+ received_seq, expected_seq,
+ received, expected
+ ) ;
+
+ if( ( received == expected ) && (seq_next_packet < seq_tot_packet) )
+ {
+ bgcol->per_eth_table[slot].payload = next_payload ;
+ for(unload_index=1;unload_index<unload_count;unload_index+=1)
+ {
+ /* This is the busiest loop. Keep it simple .... */
+ /* save the payload to store, and load up the next one */
+ received_src_key=bgcol_payload_storeload2(
+ (void*)mioaddr + _BGP_TRx_DR,
+ lnkhdrd,
+ lnkhdrd+1,
+ deposited_payload,
+ (void*)(mioaddr + _BGP_TRx_HR)) ;
+#if defined(KEEP_LNKHDR_TRAIL)
+ record_lnkhdr_trail(&lnkhdr) ;
+#endif
+ slot = received_src_key & (k_ethkey_table_size-1) ;
+ received = ((unsigned int *)&lnkhdr)[2] ;
+ expected = bgcol->per_eth_table[slot].expect ;
+ /* Find if it was an 'expected' packet in context of previous packets from this source */
+ expected_seq = ( expected >> 8 ) & 0xff ;
+ seq_tot_packet = expected & 0xff ;
+ deposited_payload = bgcol->per_eth_table[slot].payload ;
+
+ seq_next_packet = expected_seq + 1 ;
+
+
+ next_payload = deposited_payload + COL_FRAGPAYLOAD ;
+
+ TRACEN(k_t_detail,"slot=%08x seq(%x,%x) re(%08x,%08x)",
+ slot,
+ received_seq, expected_seq,
+ received, expected
+ ) ;
+ if( received != expected ) break ;
+ bgcol->per_eth_table[slot].payload = next_payload ;
+ bgcol->per_eth_table[slot].expect = expected + 0x0100 ;
+ if( seq_next_packet >= seq_tot_packet ) break ;
+ }
+ total_unload_count += unload_index ;
+ }
+ else
+ {
+ total_unload_count += 1 ;
+ }
+
+ TRACE("slot=%08x seq(%x,%x) re(%08x,%08x)",
+ slot,
+ received_seq, expected_seq,
+ received, expected
+ ) ;
+
+/* We have registers loaded, and we have exited the busy loop for one of a number of reasons
+ * 1) This is the last packet of a frame
+ * 2) We have unloaded everything that the status word said was in the FIFO
+ * 3) This packet doesn't continue the previous frame from this source properly
+ * a) This is the first packet of a frame, and there was no frame in progress
+ * b) The sender has sent packets in a sequence that we do not understand
+ *
+ * Diagnose which, and handle appropriately
+ */
+ end_frame_hint = 0 ;
+ if( received == expected && ((unsigned int)deposited_payload) != 0xffffffff )
+ {
+ /* Things are going well, put the payload down into memory, and work out what to do with it */
+ TRACE("Putting payload down at %p", deposited_payload);
+
+ bgcol_payload_store(deposited_payload) ;
+ if( seq_next_packet >= seq_tot_packet)
+ {
+ /* Frame is complete. Deliver it up a layer */
+ struct sk_buff *skb = bgcol->skb_rcv_table[slot] ;
+ if( seq_next_packet > seq_tot_packet)
+ {
+ TRACEN(k_t_request,"(!!!) seq_next_packet=%d seq_tot_packet=%d",
+ seq_next_packet, seq_tot_packet) ;
+ }
+/* BUG_ON(seq_next_packet > seq_tot_packet) ; // we think we checked this as we went along; firewall report here */
+ TRACEN(k_t_general,"Frame is complete");
+ #if defined(COLLECTIVE_DELIVER_VIA_TASKLET)
+ skb_queue_tail(&bgcol->fragskb_list_rcv, skb) ;
+ TRACEN(k_t_general,"scheduling proto tasklet");
+ tasklet_schedule(&bgcol_receive_proto_tasklet);
+ #else
+ bgcol_deliver_directly(bgcol,&lnkhdr, skb) ;
+ #endif
+ /* and tag the slot as vacant */
+ bgcol_vacate_slot(bgcol,slot) ;
+ /* 'break' here should cause the interrupt handler to return */
+ /* this CPU can then deliver the frame and the next CPU can take up */
+ /* draining the bgcol */
+#if defined(COLLECTIVE_BREAK_ON_FRAME)
+ break ;
+#endif
+ end_frame_hint = 1 ;
+ }
+ }
+ else
+ {
+ if( received == expected )
+ {
+ /* Packet looked good, but destination address was 0xffffffff. Diagnose it ... */
+ TRACEN(k_t_protocol,"Unexpected dest address 0xffffffff, received=0x%08x", received) ;
+ TRACEN(k_t_protocol,"slot=%d hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", slot, lnkhdr.conn_id, lnkhdr.this_pkt, lnkhdr.total_pkt, lnkhdr.dst_key, lnkhdr.src_key);
+ }
+ /* The packet wasn't in sequence with previous packets from the source. Look to see if we can handle it */
+ if( 0 == lnkhdr.this_pkt )
+ {
+ if ( lnkhdr.total_pkt * COL_FRAGPAYLOAD + COL_SKB_ALIGN <= bgcol->mtu)
+ {
+ if( 1 == lnkhdr.total_pkt )
+ {
+ struct sk_buff *skb = bgcol->skb_mini ;
+ /* We have a single-packet frame. Use 'skb_mini' and send it on */
+ if( skb )
+ {
+ skb_reserve(skb, COL_SKB_ALIGN - ((unsigned int)(skb->data)) % COL_SKB_ALIGN);
+ payloadptr = skb_put(skb, COL_FRAGPAYLOAD);
+ TRACE("Putting payload in mini slot at %p", payloadptr);
+ bgcol_payload_store(payloadptr) ;
+ #if defined(COLLECTIVE_DELIVER_VIA_TASKLET)
+ kept_lnkhdrp = (struct bglink_hdr_col *)(&(skb->cb)) ;
+ *kept_lnkhdrp = lnkhdr ;
+ skb_queue_tail(&bgcol->fragskb_list_rcv, skb) ;
+ TRACE("scheduling proto tasklet");
+ tasklet_schedule(&bgcol_receive_proto_tasklet);
+ #else
+ bgcol_deliver_directly(bgcol,&lnkhdr, skb) ;
+ #endif
+ }
+/* bgcol->skb_mini = alloc_skb(COL_FRAGPAYLOAD + COL_SKB_ALIGN , GFP_KERNEL | GFP_ATOMIC ) ; */
+ bgcol->skb_mini = take_skb_from_list_for_filling(bgcol) ;
+ end_frame_hint = 1 ;
+ /* If there was a partial frame in the underneath skbuff, it can be left for */
+ /* completion later. This doesn't seem likely; but the receive logic will work for it. */
+ }
+ else
+ {
+ /* Put the payload down at the beginning of the skb we had up our sleeve */
+ struct sk_buff *skb = bgcol->skb_in_waiting ;
+ if( skb && (skb_tailroom(skb) >= lnkhdr.total_pkt * COL_FRAGPAYLOAD + COL_SKB_ALIGN ) )
+ {
+ struct bglink_hdr_col *kept_lnkhdrp ;
+ int size = lnkhdr.total_pkt * COL_FRAGPAYLOAD ;
+ skb_reserve(skb, COL_SKB_ALIGN - ((unsigned int)(skb->data)) % COL_SKB_ALIGN);
+ payloadptr = skb_put(skb, size);
+ kept_lnkhdrp = (struct bglink_hdr_col *)(&(skb->cb)) ;
+ *kept_lnkhdrp = lnkhdr ;
+ TRACE("Putting payload in waiting slot at %p", payloadptr);
+ bgcol_payload_store(payloadptr) ;
+ }
+ else
+ {
+ if( skb ) dev_kfree_skb(skb) ; /* Maybe someone upped the MTU on us */
+ skb = NULL ;
+ }
+/* bgcol->skb_in_waiting = alloc_skb( */
+/* k_use_plentiful_skb ? k_plentiful_skb_size : bgcol->mtu */
+// , GFP_KERNEL | GFP_ATOMIC); /* And grab a new one */
+ bgcol->skb_in_waiting = take_skb_from_list_for_filling(bgcol) ;
+ if( skb )
+ {
+ /* If there's a part-arrived frame, trample it */
+ if( bgcol->skb_rcv_table[slot] )
+ {
+ TRACEN(k_t_protocol,"Dropping previous partial frame");
+ TRACEN(k_t_protocol,"slot=%d hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", slot, lnkhdr.conn_id, lnkhdr.this_pkt, lnkhdr.total_pkt, lnkhdr.dst_key, lnkhdr.src_key);
+ TRACEN(k_t_protocol,"expected slot=%d re=(%08x,%08x)", slot, received, expected);
+ show_lnkhdr_trail("partial frame") ;
+ {
+ struct bgnet_dev *bgnet = bgcol->bgnet ;
+ bgnet->stats.rx_errors += 1;
+ bgnet->stats.rx_missed_errors += 1;
+ }
+
+ dev_kfree_skb(bgcol->skb_rcv_table[slot]) ;
+ }
+
+
+
+ /* Set things up for the fast loop */
+ bgcol->skb_rcv_table[slot]=skb ;
+ bgcol->per_eth_table[slot].payload = payloadptr+COL_FRAGPAYLOAD ;
+ bgcol->per_eth_table[slot].expect = (lnkhdr.conn_id << 16) | (1 << 8) | (lnkhdr.total_pkt) ;
+ TRACE("Saved first packet of new frame, next bgcol->per_eth_table[%d]={%p,%08x}", slot, bgcol->per_eth_table[slot].payload,bgcol->per_eth_table[slot].expect);
+
+ }
+
+ else
+ {
+ TRACEN(k_t_protocol,"No skbuff memory available, dropping packet");
+ TRACEN(k_t_protocol,"slot=%d hdr: conn=%x, this_pkt=%x, tot_pkt=%x, dst=%x, src=%x", slot, lnkhdr.conn_id, lnkhdr.this_pkt, lnkhdr.total_pkt, lnkhdr.dst_key, lnkhdr.src_key);
+ bgcol->recv_no_skbuff += 1 ;
+ bgcol->bgnet->stats.rx_dropped += 1;
+ bgcol->bgnet->stats.rx_errors += 1;
+ }
+ }
+ }
+ else
+ {
+ bgcol_payload_store(scratch_payload) ;
+ TRACEN(k_t_protocol,"Frame larger than MTU, dropping");
+ show_lnkhdr_trail("Frame larger than MTU") ;
+ show_payload(scratch_payload,mioaddr) ;
+ bgcol->bgnet->stats.rx_errors += 1;
+ bgcol->bgnet->stats.rx_over_errors += 1;
+ }
+ }
+
+ else
+ {
+ /* Unexpected mid-frame packet */
+ bgcol_payload_store(scratch_payload) ;
+ TRACEN(k_t_protocol,"Unexpected packet from middle of frame, dropping");
+ show_lnkhdr_trail("Unexpected packet from middle of frame") ;
+ show_payload(scratch_payload,mioaddr) ;
+ bgcol->bgnet->stats.rx_errors += 1;
+ bgcol->bgnet->stats.rx_fifo_errors += 1;
+ }
+ }
+
+
+ /* We have handled the reason why the 'fast loop' dropped out. Refresh the status */
+#if !defined(COLLECTIVE_ONEPASS_TXRX)
+ /* and redrive the 'fast loop' if there is anything in the fifo. */
+/* status.raw = in_be32_nosync((unsigned*)(mioaddr + _BGP_TRx_Sx)); */
+/* unload_count = status.x.rcv_hdr ; */
+ unload_count = bgcol_status_rcv_hdr(*(unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+/* bgcol->recv_fifo_histogram3[unload_count & 0x0f ] += 1; */
+#endif
+ }
+/* bgcol->recv_total += total_unload_count ; */
+/* Return the number of packets we unloaded, and set the high bit if we have */
+/* reason to think there's nothing coming in any time soon */
+ return total_unload_count
+ | ( ( end_frame_hint && (unload_count == total_unload_count ) )
+ ? 0x80000000 : 0
+ ) ;
+
+}
+
+
+
+/* Attempting to free skbuffs in an interrupt handler doesn't work well, some 'destructor' callbacks */
+/* protest if they are driven at interrupt level. So we queue them to be freed later. */
+#ifndef COLLECTIVE_TRANSMIT_WITH_SLIH
+static void bgcol_completed_buffer_handler(unsigned long dummy)
+ {
+ struct bg_col* bgcol=__bgcol ;
+ TRACE("(>)[%s:%d]",__func__, __LINE__) ;
+ /* Free any skbufs the transmit interrupt handler has finished with */
+ {
+ struct sk_buff *freeskb = skb_dequeue(&(bgcol->skb_list_free) ) ;
+ while (freeskb)
+ {
+ TRACEN(k_t_irqflow,"Freeing skb=%p", freeskb) ;
+ dump_skb_partial(freeskb,64) ;
+ dev_kfree_skb(freeskb) ;
+ freeskb = skb_dequeue(&(bgcol->skb_list_free) ) ;
+ }
+ }
+ TRACE("(<)[%s:%d]",__func__, __LINE__) ;
+ }
+static DECLARE_TASKLET(bgcol_completed_buffer_tasklet,bgcol_completed_buffer_handler,0) ;
+#endif
+
+/* static char local_payload[COL_FRAGPAYLOAD] __attribute__((aligned(16))) ; */
+static void bgcol_xmit_next_skb(struct bg_col* bgcol)
+ {
+ if(! skb_queue_empty(&(bgcol->skb_list_xmit)))
+ {
+ struct sk_buff *skb = skb_dequeue(&(bgcol->skb_list_xmit) ) ;
+ struct bgnet_dev *bgnet = bgcol->bgnet ;
+ unsigned int i_am_compute_node = (bgnet->bgcol_vector ^ bgnet->eth_bridge_vector) & 0x00ffffff ;
+ TRACE("bgcol_xmit_next_skb bgcol_vector=0x%08x eth_bridge_vector=0x%08x i_am_compute_node=%08x",
+ bgnet->bgcol_vector,bgnet->eth_bridge_vector,i_am_compute_node
+ ) ;
+ bgcol->skb_current_xmit=skb ;
+ if( skb )
+ {
+ unsigned long offset;
+ union bgcol_header dest ;
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ /* Work out what bgcol header to use for the new skb */
+
+ TRACEN(k_t_irqflow,"%s: skb=%p, eth=%p, bgnet=%p, len=%d", __FUNCTION__, skb, eth, bgnet, skb->len);
+ dump_skb_partial(skb, 64) ;
+ dest.raw = 0 ;
+ dest.p2p.pclass = bgnet->bgcol_route;
+
+ if (is_broadcast_ether_addr(eth->h_dest)) {
+ /* May have to go to the IO node for broadcasting */
+ if(0 == i_am_compute_node)
+ {
+ TRACE("broadcasting from IO node") ;
+ dest.bcast.tag = 0;
+ bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol;
+ }
+ else
+ {
+ TRACE("sending to IO node for broadcast") ;
+ dest.p2p.vector = bgnet->eth_bridge_vector;
+ dest.p2p.p2p = 1;
+ bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_reflector_protocol;
+ }
+ } else {
+ TRACE("bgcol_xmit_next_skb bgnet->bgcol_vector=%08x bgnet->eth_bridge_vector=%08x",bgnet->bgcol_vector,bgnet->eth_bridge_vector) ;
+ if (bgnet->eth_mask == 0 ||
+ ((bgnet->eth_mask & *(unsigned int *)(&eth->h_dest[0])) ==
+ (bgnet->eth_local))) {
+ if(0 == i_am_compute_node)
+ {
+ TRACE("sending to compute node") ;
+ dest.p2p.vector = *(unsigned int *)(&eth->h_dest[2]);
+ bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol;
+ }
+ else
+ {
+ dest.p2p.vector = bgnet->eth_bridge_vector;
+ if(( bgnet->eth_bridge_vector ^ (*(unsigned int *)(&eth->h_dest[2]))) & 0x00ffffff)
+ {
+ TRACE("sending to IO node for reflection") ;
+ bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_reflector_protocol;
+ }
+ else
+ {
+ TRACE("sending to IO node as final destination") ;
+ bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol;
+ }
+ }
+ } else {
+ TRACE("sending to IO node for onward transmission") ;
+ dest.p2p.vector = bgnet->eth_bridge_vector;
+ bgcol->lnkhdr_xmit.lnk_proto = bgnet->bgcol_protocol;
+ }
+ dest.p2p.p2p = 1;
+ }
+
+ /* initialize link layer */
+ bgcol->lnkhdr_xmit.dst_key = eth_to_key(eth->h_dest);
+ bgcol->lnkhdr_xmit.src_key = bgnet->bgcol_vector;
+
+ /* pad out head of packet so it starts at a 16 Byte boundary */
+ offset = ((unsigned long)skb->data) & 0xf;
+ bgcol->lnkhdr_xmit.opt.opt_net.pad_head = offset;
+ bgcol->lnkhdr_xmit.opt.opt_net.pad_tail = (COL_FRAGPAYLOAD - ((skb->len + offset) % COL_FRAGPAYLOAD)) % COL_FRAGPAYLOAD;
+ bgcol->current_xmit_data=skb->data - offset ;
+ bgcol->current_xmit_len=skb->len + offset ;
+ /* prepare link header */
+ bgcol->lnkhdr_xmit.conn_id = bgcol->curr_conn++;
+ bgcol->lnkhdr_xmit.total_pkt = ((skb->len + offset - 1) / COL_FRAGPAYLOAD) + 1;
+ bgcol->lnkhdr_xmit.this_pkt = 0;
+ TRACE("%s: dst_key=%08x src_key=%08x lnk_proto=%d conn_id=%d total_pkt=%d pad_head=%d pad_tail=%d", __FUNCTION__,
+ bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, bgcol->lnkhdr_xmit.lnk_proto, bgcol->lnkhdr_xmit.conn_id, bgcol->lnkhdr_xmit.total_pkt, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail );
+ bgcol->fragidx_xmit = 0 ;
+ bgcol->dest_xmit = dest ;
+ TRACEN(k_t_lowvol,"bgnet xmit: dst=%08x, src=%08x, ldst=%08x, head=%d, tail=%d",
+ bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, dest.raw, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail);
+ }
+ }
+ }
+
+/* Push packets in until we finish the skb or the fifo fills */
+/* Returns 2 if we would like to push something into the fifo but cannot because it is full */
+/* Returns 1 if we pushed something into the fifo */
+static inline int bgcol_xmit_push_packets(struct bg_col* bgcol,
+/* struct bgcol_channel *chn, */
+ unsigned int status_in, unsigned int mioaddr)
+ {
+ unsigned int fragidx ;
+ struct bgnet_dev *bgnet = bgcol->bgnet ;
+ union bgcol_status status;
+ union bgcol_header dest ;
+ struct sk_buff *skb = bgcol->skb_current_xmit ;
+ void *payloadptr = bgcol->current_xmit_data ;
+ int len = bgcol->current_xmit_len ;
+ int fullness ;
+ int initial_fragidx ;
+ double *lnkhdrxd = (double *) &(bgcol->lnkhdr_xmit) ;
+
+ dest = bgcol->dest_xmit ;
+ fragidx = bgcol->fragidx_xmit ;
+ TRACE("bgnet xmit: dst=%08x, src=%08x, ldst=%08x, head=%d, tail=%d, fragidx=%d",
+ bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, dest.raw, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail, fragidx);
+ dump_skb_partial(skb,64) ;
+ if( 0 != ( ((unsigned)(payloadptr) ) & 0x0f ) )
+ {
+ TRACEN(k_t_request, "Misaligned payloadptr=%p", payloadptr) ;
+ }
+/* BUG_ON(0 != ( ((unsigned)(payloadptr) ) & 0x0f ) ) ; */
+ if( 0 == ( ((unsigned)payloadptr) & 0x0f ) )
+ {
+ /* Have we got space in the FIFO ? */
+ status.raw = status_in ;
+ fullness = status.x.inj_hdr ;
+/* bgcol->send_fifo_histogram[fullness] += 1 ; // fullness statistics */
+ TRACE("bgnet xmit: status=%08x",status.raw);
+ if (fullness >= COL_FIFO_SIZE )
+ {
+ /* No room. Upper routines will retry when appropriate */
+ TRACEN(k_t_irqflow,"Send FIFO full");
+ TRACEN(k_t_irqflow,"bgnet xmit: dst=%08x, src=%08x, ldst=%08x, head=%d, tail=%d, fragidx=%d",
+ bgcol->lnkhdr_xmit.dst_key, bgcol->lnkhdr_xmit.src_key, dest.raw, bgcol->lnkhdr_xmit.opt.opt_net.pad_head, bgcol->lnkhdr_xmit.opt.opt_net.pad_tail, fragidx);
+ return 2 ;
+ }
+ /* update fragment index */
+ bgcol->lnkhdr_xmit.this_pkt = fragidx;
+ initial_fragidx = fragidx ;
+#if defined(COLLECTIVE_ONEPASS_TXRX)
+ if( len >= COL_FRAGPAYLOAD )
+#else
+ while( len >= COL_FRAGPAYLOAD && fullness < COL_FIFO_SIZE)
+#endif
+ {
+ bgcol_payload_inject_load2(lnkhdrxd,lnkhdrxd+1, payloadptr) ;
+ dump_bgcol_packet(&bgcol->lnkhdr_xmit, payloadptr) ;
+ fragidx += 1 ;
+ bgcol->lnkhdr_xmit.this_pkt = fragidx;
+ *(volatile unsigned*)(mioaddr + _BGP_TRx_HI) = dest.raw;
+ len -= COL_FRAGPAYLOAD;
+ payloadptr += COL_FRAGPAYLOAD;
+ fullness += 1;
+ while( len >= COL_FRAGPAYLOAD && fullness < COL_FIFO_SIZE)
+ {
+ /* We have full packets, and space in the fifo for them */
+ TRACE("bgcol: ptr=%p, len=%d", payloadptr, len);
+ bgcol_payload_inject_storeload2((void*)(mioaddr + _BGP_TRx_DI),lnkhdrxd,lnkhdrxd+1, payloadptr) ;
+ dump_bgcol_packet(&bgcol->lnkhdr_xmit, payloadptr) ;
+ fragidx += 1 ;
+ bgcol->lnkhdr_xmit.this_pkt = fragidx;
+ /* write destination header */
+ *(volatile unsigned*)(mioaddr + _BGP_TRx_HI) = dest.raw ;
+ len -= COL_FRAGPAYLOAD;
+ payloadptr += COL_FRAGPAYLOAD;
+ fullness += 1;
+ }
+ bgcol_payload_inject_store((void*)(mioaddr + _BGP_TRx_DI)) ;
+#if !defined(COLLECTIVE_ONEPASS_TXRX)
+ status.raw = in_be32_nosync((unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+ fullness = status.x.inj_hdr ;
+#endif
+ }
+ bgnet->stats.tx_bytes += COL_FRAGPAYLOAD*(fragidx-initial_fragidx) ;
+
+ /* Either the FIFO is full, or we are near (or at) the end of the skb-worth of data */
+ /* Stuff one packet in. */
+
+
+ if( len > 0 && fullness < COL_FIFO_SIZE )
+ {
+ /* If the last packet doesn't cross a page boundary, we can send it with */
+ /* whatever is in memory after it, and we won't get a SEGV. */
+ TRACE("bgcol: ptr=%p, len=%d", payloadptr, len);
+ bgnet->stats.tx_bytes += len;
+
+ /* write destination header */
+/* enable_kernel_fp() ; */
+ *(volatile unsigned*)(mioaddr + _BGP_TRx_HI) = dest.raw;
+/* bgcol_payload_inject_load2partial(lnkhdrxd,lnkhdrxd+1, payloadptr,(len+15)/16) ; */
+ bgcol_payload_inject_load2(lnkhdrxd,lnkhdrxd+1, payloadptr) ;
+ bgcol_payload_inject_store((void*)(mioaddr + _BGP_TRx_DI)) ;
+
+ len=0 ;
+ }
+
+ }
+ else
+ {
+ /* The packet was misaligned. This will cause the skb to be flushed and we will get a */
+ /* fresh one next time. */
+ len=0 ;
+ }
+ TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit);
+
+ TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit);
+ /* Did we complete the skb ? */
+ if( 0 == len )
+ {
+ /* Yes, we can free this one and upper layers will cue the next one */
+ TRACEN(k_t_irqflow,"bgcol: finished skb=%p", skb);
+ bgnet->stats.tx_packets++;
+ dump_skb_partial(skb,64);
+/* Linux seems unhappy freeing skb's in an interrupt handler */
+#if defined(COLLECTIVE_TRANSMIT_WITH_SLIH)
+#if defined(COLLECTIVE_XMITTER_FREES)
+ skb_queue_tail(&bgcol->skb_list_free,skb) ;
+#else
+ dev_kfree_skb(skb) ;
+#endif
+#else
+ skb_queue_tail(&bgcol->skb_list_free,skb) ;
+ tasklet_schedule(&bgcol_completed_buffer_tasklet) ;
+#endif
+ bgcol->skb_current_xmit=NULL ;
+ }
+ else
+ {
+ /* No, Remember the link header for next time */
+ TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit);
+ TRACE("bgcol: more to go for skb=%p , fragidx=%d, len=%d", skb, fragidx, skb->len);
+ bgcol->fragidx_xmit = fragidx ;
+ bgcol->current_xmit_len=len ;
+ bgcol->current_xmit_data=payloadptr ;
+ }
+ TRACE("bgcol: bgcol->skb_current_xmit=%p", bgcol->skb_current_xmit);
+ return 1 ; /* Indicate that a redrive might be productive */
+ }
+
+
+/* One pass at filling the transmit FIFO. */
+/* Returns 2 if we would like to push something into the fifo but cannot because it is full */
+/* Returns 1 if we pushed something into the fifo (and we would like a redrive because we finished a frame) */
+/* Returns 0 if all the data has been put in the FIFO (and a redrive would be unproductive unless someone queues a frame for sending) */
+/* An upper layer must redrive or enable interrupts if it gets a non-zero. */
+static inline int bgcol_xmit_onepass(struct bg_col *bgcol, unsigned int status_in, unsigned int mioaddr)
+ {
+/* unsigned chnidx = bgcol->bgnet_channel ; */
+ struct sk_buff *skb = bgcol->skb_current_xmit ;
+ if( NULL == skb)
+ {
+ struct bgnet_dev *bgnet = bgcol->bgnet ;
+ if( bgnet)
+ {
+ bgcol_xmit_next_skb(bgcol) ;
+ skb = bgcol->skb_current_xmit ;
+ if( NULL == skb )
+ {
+ TRACEN(k_t_irqflow,"bgcol: no more to send");
+ return 0 ;
+ }
+ }
+ else
+ {
+ TRACEN(k_t_irqflow,"bgcol: bgnet is not ready");
+ return 0 ;
+ }
+ }
+ /* By this stage we should have a viable skb and a viable link header */
+ return bgcol_xmit_push_packets(bgcol,
+ status_in,
+ mioaddr) ;
+ }
+
+/* 'full duplex' SLIH, receiving and sending */
+/* Number of times to spin before concluding there isn't anything on the bgcol */
+enum {
+ k_unproductive_receive_threshold = 10 ,
+ k_unproductive_transmit_threshold = 10
+};
+
+void bgcol_duplex_slih(unsigned long dummy)
+ {
+ struct bg_col *bgcol = __bgcol ;
+ struct bgcol_channel *chn = &bgcol->chn[bgcol->bgnet_channel];
+ unsigned int mioaddr=chn->mioaddr ;
+ unsigned int status=*((volatile unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+ unsigned int rcr ;
+ unsigned int rcx ;
+ unsigned int productive=0 ;
+ unsigned int unproductive_receive_count=0 ;
+ unsigned int unproductive_transmit_count=0 ;
+ unsigned int rcrset = 0 ;
+
+ enable_kernel_fp() ;
+
+#if defined(KEEP_BG_COL_STATISTICS)
+ bgcol->send_fifo_histogram0[(status >> 16) & 0x0f] += 1 ;
+ bgcol->recv_fifo_histogram0[(status ) & 0x0f] += 1 ;
+#endif
+ for(;;)
+ {
+ TRACEN(k_t_irqflow,"status=%08x", status);
+ rcr = bgcol_receive_mark3(bgcol, bgcol->bgnet_channel, status, mioaddr) ;
+#if defined(KEEP_BG_COL_STATISTICS) && defined(EXTRA_TUNING)
+ {
+ unsigned int extra_status=*((volatile unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+ bgcol->send_fifo_histogram2[(extra_status >> 16) & 0x0f] += 1 ;
+ bgcol->recv_fifo_histogram2[(extra_status ) & 0x0f] += 1 ;
+
+ }
+#endif
+ rcx = bgcol_xmit_onepass(bgcol, status, mioaddr) ;
+ TRACEN(k_t_irqflow,"rcr=0x%08x rcx=0x%08x", rcr, rcx);
+ status=*((volatile unsigned*)(mioaddr + _BGP_TRx_Sx)) ;
+#if defined(KEEP_BG_COL_STATISTICS)
+ bgcol->send_fifo_histogram1[(status >> 16) & 0x0f] += 1 ;
+ bgcol->recv_fifo_histogram1[(status ) & 0x0f] += 1 ;
+#endif
+ /* What we do now depends on whether the slihs were 'productive' ... */
+ unproductive_receive_count = rcr ? 0 : (unproductive_receive_count+1) ;
+ unproductive_transmit_count = (rcx==1) ? 0 : (unproductive_transmit_count+1) ;
+ productive += ( 0 != rcr || 1 == rcx ) ;
+ rcrset = ( rcr > 0 ) ? 0 : rcrset ;
+ rcrset |= rcr ;
+ if( 0 == productive )
+ {
+#if defined(KEEP_BG_COL_STATISTICS)
+ bgcol->spurious_interrupts += 1 ;
+#endif
+ break ; /* a spurious interrupt */
+ }
+ if( ( unproductive_receive_count > k_unproductive_receive_threshold
+ || (rcrset & 0x80000000)
+ )
+ &&
+ ( unproductive_transmit_count > k_unproductive_transmit_threshold
+ || (rcx == 0 )
+ )
+ ) break ; /* Neither transmit not receive are likely to progress */
+ }
+
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+ if( bgcol_diagnostic_use_napi )
+ {
+ TRACEN(k_t_napi,"napi_complete(%p)",&(bgcol->bgnet->napi)) ;
+ napi_complete(&(bgcol->bgnet->napi)) ;
+ }
+#endif
+ bgcol->handler_running = 0 ;
+ if( 0 != rcx )
+ {
+ /* Filled the TX FIFO, need an interrupt when it has room */
+ TRACEN(k_t_irqflow,"Enabling TX interrupts");
+ bgcol_enable_interrupts_xmit(bgcol) ; /* Ask for an interrupt when there is space */
+ }
+
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+ mod_timer(&bgcol->missed_interrupt_timer, jiffies+200) ; /* Cause timer interrupt after 2000ms if things don't stay alive ... temp while diagnosing problem ... */
+#endif
+ bgcol_enable_interrupts_rcv(bgcol) ;
+ }
+
+
+static DECLARE_TASKLET(bgcol_duplex_slih_tasklet,bgcol_duplex_slih,0);
+
+static irqreturn_t bgcol_duplex_interrupt(int irq, void *dev)
+ {
+ struct bg_col *bgcol = (struct bg_col*)dev;
+
+ TRACE("bgnet: (>)interrupt %d", irq);
+ bgcol->handler_running = 1 ;
+ bgcol_disable_interrupts_xmit(bgcol) ;
+ bgcol_disable_interrupts_rcv(bgcol) ;
+ (void) mfdcrx(bgcol->dcrbase +_BGP_DCR_TR_REC_PRXF);
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+ if( bgcol_diagnostic_use_napi)
+ {
+ TRACEN(k_t_napi,"napi_schedule(%p)",&bgcol->bgnet->napi) ;
+ napi_schedule(&bgcol->bgnet->napi) ;
+ }
+ else
+ {
+ tasklet_schedule(&bgcol_duplex_slih_tasklet);
+
+ }
+#else
+ tasklet_schedule(&bgcol_duplex_slih_tasklet);
+#endif
+ TRACE("bgnet: (<)interrupt %d", irq);
+ return IRQ_HANDLED ;
+ }
+
+
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+static void bgcol_missed_interrupt(unsigned long dummy)
+{
+ struct bg_col *bgcol = (struct bg_col*)&static_col;
+ TRACEN(k_t_irqflow,"(>)") ;
+
+ bgcol->handler_running = 1 ;
+ bgcol_disable_interrupts_xmit(bgcol) ;
+ bgcol_disable_interrupts_rcv(bgcol) ;
+ (void) mfdcrx(bgcol->dcrbase +_BGP_DCR_TR_REC_PRXF);
+ #if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+ if( bgcol_diagnostic_use_napi)
+ {
+ TRACEN(k_t_napi,"napi_schedule(%p)",&bgcol->bgnet->napi) ;
+ napi_schedule(&bgcol->bgnet->napi) ;
+ }
+ else
+ {
+ tasklet_schedule(&bgcol_duplex_slih_tasklet);
+
+ }
+ #else
+ tasklet_schedule(&bgcol_duplex_slih_tasklet);
+ #endif
+ mod_timer(&bgcol->missed_interrupt_timer, jiffies+10) ; /* Cause timer interrupt after 100ms if things don't stay alive ... temp while diagnosing problem ... */
+ TRACEN(k_t_irqflow,"(<)") ;
+}
+#endif
+int col_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct bg_col *bgcol=__bgcol ;
+ TRACEN(k_t_irqflow|k_t_startxmit,"%s: Enq skb=%p, dev=%p, len=%d", __FUNCTION__, skb, dev, skb->len);
+#if defined(COLLECTIVE_TRANSMIT_WITH_SLIH)
+ skb_queue_tail(&(bgcol->skb_list_xmit),skb) ;
+#if defined(COLLECTIVE_TRANSMIT_WITH_FLIH)
+ if( ! bgcol->handler_running)
+ {
+ TRACEN(k_t_irqflow,"Enabling TX interrupts");
+ bgcol_enable_interrupts_xmit(bgcol) ; /* Ask for an interrupt when there is space */
+ }
+#else
+ tasklet_schedule(&bgcol_duplex_slih_tasklet);
+#endif
+#else
+ {
+ unsigned int flags ;
+
+ dump_skb_partial(skb,64) ;
+ spin_lock_irqsave(&bgcol->irq_lock_xmit, flags) ;
+ {
+ struct sk_buff *xskb = bgcol->skb_current_xmit ;
+ if( NULL == xskb && skb_queue_empty(&(bgcol->skb_list_xmit)))
+ {
+ int rc ;
+ TRACEN(k_t_irqflow,"%s: Enq+en skb=%p, len=%d", __FUNCTION__, skb, skb->len);
+ skb_queue_tail(&(bgcol->skb_list_xmit),skb) ;
+ enable_kernel_fp();
+ rc = bgcol_xmit_handle(bgcol) ;
+ if( 0 == rc )
+ {
+ /* No room in fifo */
+ TRACEN(k_t_irqflow,"Enabling TX interrupts");
+ bgcol_enable_interrupts_xmit(bgcol) ; /* Ask for an interrupt when there is space */
+ }
+ }
+ else
+ {
+ TRACEN(k_t_irqflow,"%s: Enq skb=%p, dev=%p, len=%d", __FUNCTION__, skb, dev, skb->len);
+ skb_queue_tail(&(bgcol->skb_list_xmit),skb) ;
+ }
+ }
+ spin_unlock_irqrestore(&bgcol->irq_lock_xmit, flags);
+ }
+#endif
+/* } */
+#if defined(COLLECTIVE_XMITTER_FREES)
+ {
+ struct sk_buff *skb = skb_dequeue(&(bgcol->skb_list_free) ) ;
+ while(skb)
+ {
+ TRACEN(k_t_irqflow,"Freeing sent skb=%p",skb);
+ dev_kfree_skb(skb) ;
+ skb = skb_dequeue(&(bgcol->skb_list_free) ) ;
+ }
+
+ }
+#endif
+ return 0 ;
+}
+
+
+/* static int bgpnet_add_device(int major, */
+/* int minor, */
+/* const char* devname, */
+/* unsigned long long physaddr, */
+/* int irq, */
+/* irqreturn_t (*irq_handler)(int, void *)) */
+/* { */
+/* int ret; */
+/* dev_t devno; */
+/* struct bgpnet_dev* dev = &bgpnet_devices[bgpnet_num_devices]; */
+/* */
+// /* initilize struct */
+/* init_MUTEX (&dev->sem); */
+/* dev->major = major; */
+/* dev->minor = minor; */
+/* dev->physaddr = physaddr; */
+/* init_waitqueue_head(&dev->read_wq); */
+/* dev->read_complete = 0; */
+/* if (physaddr) { */
+/* dev->regs = ioremap(physaddr, 4096); */
+/* } */
+/* devno=MKDEV(major,minor); */
+/* */
+// /* register i.e., /proc/devices */
+/* ret=register_chrdev_region(devno,1,(char *)devname); */
+/* */
+/* if (ret) */
+/* { */
+/* printk (KERN_WARNING "bgpnet: couldn't register device (%d,%d) register_chrdev_region err=%d\n", */
+/* major,minor,ret); */
+/* return ret; */
+/* } */
+/* */
+// /* add cdev */
+/* cdev_init(&dev->cdev,&bgpnet_device_fops); */
+/* dev->cdev.owner=THIS_MODULE; */
+/* dev->cdev.ops=&bgpnet_device_fops; */
+/* ret=cdev_add(&dev->cdev,devno,1); */
+/* if (ret) */
+/* { */
+/* printk(KERN_WARNING "bgpnet: couldn't register device (%d,%d) cdev_add err=%d\n", */
+/* major,minor,ret); */
+/* return ret; */
+/* } */
+/* */
+// /* signul to pass to owning process, should be altered using ioctl */
+/* dev->signum=-1; */
+/* */
+/* bgpnet_num_devices++; */
+/* */
+/* return 0; */
+/* } */
+
+/* static int bgpnet_device_open (struct inode *inode, struct file *filp) */
+/* { */
+/* struct bgpnet_dev *dev=container_of(inode->i_cdev,struct bgpnet_dev,cdev); */
+/* */
+/* if(down_interruptible(&dev->sem)) return -ERESTARTSYS; */
+/* up(&dev->sem); */
+/* */
+/* dev->current=current; */
+/* filp->private_data = (void*) dev; */
+/* */
+/* TRACE("bgpnet: device (%d,%d) opened by process \"%s\" pid %i", */
+/* MAJOR(inode->i_rdev), MINOR(inode->i_rdev), current->comm, current->pid); */
+/* */
+/* return 0; */
+/* } */
+
+
+/*
+ * Read doesn't actually read anything. It simply blocks if the fifo is empty.
+ */
+/* static ssize_t bgpnet_device_read(struct file *filp, char __user *buf, size_t count, */
+/* loff_t *f_pos) */
+/* { */
+/* struct bgpnet_dev* dev = (struct bgpnet_dev *)filp->private_data; */
+/* union bgcol_status status; */
+/* int chn = dev->minor; */
+/* */
+/* if (dev->major == BGP_COL_MAJOR_NUM && (chn == 0 || chn == 1)) { */
+/* status.raw = in_be32((unsigned *)((char*)dev->regs + _BGP_TRx_Sx)); */
+/* if (!status.x.rcv_hdr) { */
+/* TRACE("bgpnet: read found status not ready status=0x%08x", status.raw); */
+// /* enable interrupt when packets come in. */
+/* bgcol_enable_rcv_wm_interrupt(&__bgcol->chn[chn]); */
+/* wait_event_interruptible(dev->read_wq, dev->read_complete); */
+/* dev->read_complete = 0; */
+/* TRACE("bgpnet: read wakes up"); */
+/* } */
+// /* Ok if we give a false positive -- we tried.
+/* * Note that we never actually copy out some data. The status might be a useful */
+/* * thing to write in the buffer, but the caller only cares to block until */
+/* * something is there. */
+// */
+/* } */
+/* */
+/* return 0; */
+/* } */
+
+
+/* Don't think this will work on the 'bgnet' channel. What is the intent ? CIOD ? */
+/* If for CIOD, it may have suffered in the 'revised interrupt handler' integrataion */
+/*
+ * Note that poll only waits for data to be available in the read fifo.
+ * We do this by enabling an interrupt while we wait. The interrupt is disabled
+ * when it fires. The poll may complete before it fires (timeout), but that is ok.
+ */
+/* static unsigned int bgpnet_device_poll(struct file *filp, poll_table * wait) */
+/* { */
+/* struct bgpnet_dev* dev = (struct bgpnet_dev*) filp->private_data; */
+/* unsigned int rc; */
+/* union bgcol_status status; */
+/* unsigned int chn = dev->minor; */
+/* */
+/* if (dev->major == BGP_COL_MAJOR_NUM && (chn == 0 || chn == 1)) { */
+/* poll_wait(filp, &dev->read_wq, wait); */
+/* */
+// /* Return current col status. */
+// rc = POLLOUT|POLLWRNORM; /* For now implement read poll only */
+/* status.raw = in_be32((unsigned *)((char*)dev->regs + _BGP_TRx_Sx)); */
+/* if (status.x.rcv_hdr) { */
+/* TRACE("bgpnet: poll found status ready status=0x%08x", status.raw); */
+// /* got something already */
+/* rc |= POLLIN|POLLRDNORM; */
+/* } else { */
+/* TRACE("bgpnet: poll found status not ready status=0x%08x", status.raw); */
+// /* enable interrupt when packets come in. */
+/* mtdcrx(_BGP_DCR_TR_REC_PRXEN, (chn ? _TR_REC_PRX_WM1 : _TR_REC_PRX_WM0)); */
+/* } */
+/* } else */
+/* rc = POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM; */
+/* */
+/* return rc; */
+/* } */
+
+
+/* static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *vma) */
+/* { */
+/* unsigned long vsize = vma->vm_end - vma->vm_start; */
+/* struct bgpnet_dev * device = (struct bgpnet_dev *)filp->private_data; */
+/* int ret = -1; */
+/* */
+// /* ------------------------------------------------------- */
+// /* set up page protection. */
+// /* ------------------------------------------------------- */
+/* */
+/* vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); */
+/* vma->vm_flags |= VM_IO; */
+/* vma->vm_flags |= VM_RESERVED; */
+/* */
+// /* ------------------------------------------------------- */
+// /* do the mapping */
+// /* ------------------------------------------------------- */
+/* */
+/* if (device->physaddr != 0) */
+/* ret = remap_pfn_range(vma, */
+/* vma->vm_start, */
+/* device->physaddr >> PAGE_SHIFT, */
+/* vsize, */
+/* vma->vm_page_prot); */
+/* */
+/* if (ret) { */
+/* printk (KERN_WARNING "bgpnet: mapping of device (%d,%d) failed\n", */
+/* device->major, device->minor); */
+/* } else { */
+/* TRACE("bgpnet: mapped (%d,%d) to vm=%lx", */
+/* device->major, device->minor, vma->vm_start); */
+/* } */
+/* return ret? -EAGAIN :0; */
+/* } */
+
+/* ************************************************************************* */
+/* BG/P network: release device */
+/* ************************************************************************* */
+
+/* static int bgpnet_device_release (struct inode *inode, struct file * filp) */
+/* { */
+/* struct bgpnet_dev *dev=(struct bgpnet_dev *)filp->private_data; */
+/* */
+// /*Ensure exclusive access*/
+/* if(down_interruptible(&dev->sem)) return -ERESTARTSYS; */
+/* */
+/* dev->current = NULL; */
+/* up(&dev->sem); */
+/* */
+/* TRACE("bgpnet: device (%d,%d) successfully released", */
+/* MAJOR(inode->i_rdev), MINOR(inode->i_rdev)); */
+/* return 0; */
+/* } */
+
+
+/* static int bgpnet_device_ioctl (struct inode *inode, */
+/* struct file * filp, */
+/* unsigned int cmd, */
+/* unsigned long arg) */
+/* { */
+/* return 0; */
+/* } */
+
+
+
+/* Base 10 is assumed. Hexadecimal numbers must begin with 0x or 0X (ie. 0xabadcafe). */
+/* Binary numbers must begin with the letter b in lowercase (ie. b01101001). */
+#define LOWER(c) ((c) < 'a' ? (c) + ('a' - 'A') : (c))
+static inline unsigned long atol(char *str)
+{
+ unsigned long value = 0;
+ unsigned char base = 10;
+
+ if ((*str == '0') && (LOWER(*(str+1)) == 'x')) {
+ base = 16; /* hexadecimal */
+ str += 2;
+ } else if (*str == 'b') {
+ base = 2; /* binary */
+ str++;
+ }
+
+ for (; *str; str++) {
+ unsigned char digit = (*str > '9') ? (10 + LOWER(*str) - 'a') : (*str - '0');
+ if (digit >= base) {
+ value = 0;
+ break;
+ }
+ value = value * base + digit;
+ }
+
+ return value;
+}
+
+
+
+
+
+/**********************************************************************
+ * Initialization and shut-down
+ **********************************************************************/
+
+static inline void bgcol_reset_channel(struct bgcol_channel *chn)
+{
+ mtdcrx(chn->dcrbase + _BGP_DCR_TR_RCTRL, _TR_RCTRL_RST);
+ mtdcrx(chn->dcrbase + _BGP_DCR_TR_SCTRL, _TR_RCTRL_RST);
+}
+
+
+static int bgcol_init_channel(unsigned long idx, struct bg_col *col)
+{
+ struct bgcol_channel* chn = &col->chn[idx];
+ int i;
+
+ chn->paddr = COL_CHANNEL_PADDR(idx);
+ chn->dcrbase = col->dcrbase + COL_CHANNEL_DCROFF(idx);
+ chn->irq_rcv_pending_mask = COL_IRQ_RCV_PENDING_MASK(idx);
+ chn->irq_inj_pending_mask = COL_IRQ_INJ_PENDING_MASK(idx);
+ init_timer(&chn->inj_timer);
+ chn->inj_timer.function = inj_timeout;
+ chn->inj_timer.data = (unsigned long) col;
+ chn->inj_timer.expires = 0;
+ for (i = 0; i < BGP_MAX_DEVICES; i++)
+ if (bgpnet_devices[i].major == BGP_COL_MAJOR_NUM &&
+ bgpnet_devices[i].minor == idx) {
+ chn->chrdev = &bgpnet_devices[i];
+ break;
+ }
+ if (i >= BGP_MAX_DEVICES)
+ chn->chrdev = NULL;
+ chn->col = col;
+ chn->idx = idx;
+
+ if (!request_mem_region(chn->paddr, _BGP_COL_SIZE, COL_DEV_NAME))
+ return -1;
+
+ chn->mioaddr = (unsigned long)ioremap(chn->paddr, _BGP_COL_SIZE);
+ if (!chn->mioaddr)
+ goto err_remap;
+
+ if (chn)
+ mtdcrx(col->dcrbase + _BGP_DCR_TR_GLOB_VCFG1,
+ _TR_GLOB_VCFG_RWM(0) | _TR_GLOB_VCFG_IWM(4));
+ else
+ mtdcrx(col->dcrbase + _BGP_DCR_TR_GLOB_VCFG0,
+ _TR_GLOB_VCFG_RWM(0) | _TR_GLOB_VCFG_IWM(4));
+ mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_REC_PRXEN, COL_IRQMASK_REC);
+ mtdcrx(chn->col->dcrbase + _BGP_DCR_TR_INJ_PIXEN, COL_IRQMASK_INJ);
+
+ return 0;
+
+ err_remap:
+ printk("error mapping col\n");
+ release_mem_region(chn->mioaddr, _BGP_COL_SIZE);
+
+ return -1;
+}
+
+static int bgcol_uninit_channel(struct bgcol_channel *chn,
+ struct bg_col *col)
+{
+ if (chn->mioaddr)
+ {
+ iounmap((void*)chn->mioaddr);
+ chn->mioaddr = 0;
+
+ /* unconditionally... */
+ release_mem_region(chn->paddr, _BGP_COL_SIZE);
+ }
+ return 0;
+}
+
+static int bgcol_init (struct bg_col *col)
+{
+ int cidx, rc, idx;
+
+/* skb_queue_head_init(&skb_delivery_queue) ; */
+ if( 0 == col->mtu)
+ {
+ bgcol_set_mtu(col,60960+sizeof(struct ethhdr) ) ; /* It's possible that the 'bgnet' might have won a race to set MTU ... */
+ }
+ col->skb_in_waiting = alloc_skb(
+ k_use_plentiful_skb ? k_plentiful_skb_size : col->mtu
+ , GFP_KERNEL );
+ col->skb_mini = alloc_skb(BGNET_FRAG_MTU + COL_SKB_ALIGN , GFP_KERNEL ) ;
+
+ spin_lock_init(&col->lock);
+ spin_lock_init(&col->irq_lock);
+
+ skb_queue_head_init(&col->skb_list_for_filling) ;
+ skb_queue_head_init(&col->skb_list_for_delivering) ;
+ skb_queue_head_init(&col->skb_list_for_freeing) ;
+
+ bgcol_prefill(&col->skb_list_for_filling, 100) ;
+
+
+ col->dcrbase = COL_DCR_BASE;
+
+ skb_queue_head_init(&col->skb_list_xmit) ;
+ skb_queue_head_init(&col->skb_list_free) ;
+ col->skb_current_xmit = NULL ;
+
+ skb_queue_head_init(&col->fragskb_list_rcv) ;
+ init_ethkey_table(col) ;
+
+ /* abuse IO port structure for DCRs */
+ if (!request_region(col->dcrbase, COL_DCR_SIZE, COL_DEV_NAME))
+ return -1;
+
+ /* disable device IRQs before we attach them */
+ bgcol_disable_interrupts(col);
+
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+ setup_timer(&col->missed_interrupt_timer,bgcol_missed_interrupt,0) ;
+#endif
+ col->nodeid = mfdcrx(col->dcrbase + _BGP_DCR_TR_GLOB_NADDR);
+
+ for (cidx = 0; cidx < BGP_MAX_CHANNEL; cidx++) {
+ if (bgcol_init_channel(cidx, col) != 0)
+ goto err_channel;
+ }
+
+ /* clear exception flags */
+ mfdcrx(col->dcrbase + _BGP_DCR_TR_INJ_PIXF);
+ mfdcrx(col->dcrbase + _BGP_DCR_TR_REC_PRXF);
+
+ /* allocate IRQs last; otherwise, if an IRQ is still pending, we */
+ /* get kernel segfaults */
+ for (idx = 0; bgcol_irqs[idx].irq != -1; idx++)
+ {
+#if defined(COLLECTIVE_TREE_AFFINITY)
+ bic_set_cpu_for_irq(bgcol_irqs[idx].irq,k_TreeAffinityCPU) ;
+ TRACEN(k_t_general,"setting affinity irq=%d affinity=%d",bgcol_irqs[idx].irq, k_TreeAffinityCPU );
+#endif
+ rc = request_irq(bgcol_irqs[idx].irq, bgcol_irqs[idx].handler,
+ IRQF_DISABLED, bgcol_irqs[idx].name, col);
+ if (rc)
+ goto err_irq_alloc;
+ }
+
+
+ return 0;
+
+ err_irq_alloc:
+ for (idx = 0; bgcol_irqs[idx].irq != -1; idx++)
+ free_irq(bgcol_irqs[idx].irq, col);
+
+ err_channel:
+ for (cidx = 0; cidx < BGP_MAX_CHANNEL; cidx++)
+ bgcol_uninit_channel(&col->chn[cidx], col);
+
+ release_region(col->dcrbase, COL_DCR_SIZE);
+
+ return -1;
+}
+
+/**********************************************************************
+ * /proc filesystem
+ **********************************************************************/
+
+#define TGREAD(r, d) \
+ rc = snprintf(page, remaining, "%.30s (%03x): %08x\n", d, \
+ bgcol->dcrbase + r, mfdcrx(bgcol->dcrbase + r)); \
+ if (rc < 0) goto out; \
+ if (rc > remaining) { remaining = 0; goto out; } \
+ page += rc; \
+ remaining -= rc;
+
+#define TGSHOW(r) \
+ rc = snprintf(page, remaining, "%.60s : %08x\n", #r, (unsigned int)(r) );\
+ if (rc < 0) goto out; \
+ if (rc > remaining) { remaining = 0; goto out; } \
+ page += rc; \
+ remaining -= rc;
+
+
+static int bgpnet_statistics_read (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct bg_col *bgcol = data;
+ int rc, remaining = count;
+ *eof = 1;
+ TGREAD(_BGP_DCR_TR_REC_PRXEN, "Receive Exception Enable");
+ TGREAD(_BGP_DCR_TR_REC_PRXF, "Receive Exception Flag ");
+ TGREAD(_BGP_DCR_TR_INJ_PIXEN, "Injection Exception Enable");
+ TGREAD(_BGP_DCR_TR_INJ_PIXF, "Injection Exception Flag ");
+
+ TGSHOW(*((unsigned*)(bgcol->chn[0].mioaddr + _BGP_TRx_Sx))) ;
+ TGSHOW(bgcol->curr_conn) ;
+#if !defined(COLLECTIVE_TRANSMIT_WITH_SLIH)
+ TGSHOW(spin_is_locked(&bgcol->irq_lock_xmit)) ;
+#endif
+ TGSHOW(skb_queue_len(&bgcol->skb_list_xmit)) ;
+ TGSHOW(skb_queue_len(&bgcol->skb_list_free)) ;
+ TGSHOW(skb_queue_len(&bgcol->fragskb_list_rcv)) ;
+ TGSHOW(bgcol->skb_current_xmit) ;
+ TGSHOW(bgcol->current_xmit_len) ;
+ TGSHOW(bgcol->fragidx_xmit) ;
+ TGSHOW(bgcol->recv_total) ;
+ TGSHOW(bgcol->recv_guess_miss) ;
+ TGSHOW(bgcol->recv_no_skbuff) ;
+ TGSHOW(bgcol->recv_no_first_packet) ;
+ TGSHOW(bgcol->spurious_interrupts) ;
+ TGSHOW(irq_desc[BG_COL_IRQ_INJ].status) ;
+ TGSHOW(irq_desc[BG_COL_IRQ_INJ].irq_count) ;
+ TGSHOW(irq_desc[BG_COL_IRQ_INJ].irqs_unhandled) ;
+ TGSHOW(irq_desc[BG_COL_IRQ_RCV].status) ;
+ TGSHOW(irq_desc[BG_COL_IRQ_RCV].irq_count) ;
+ TGSHOW(irq_desc[BG_COL_IRQ_RCV].irqs_unhandled) ;
+
+ out:
+
+ return count - remaining;
+}
+
+static int bgpnet_status_read (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct bg_col *bgcol = data;
+ int rc, remaining = count;
+ *eof = 1;
+
+
+ TGREAD(_BGP_DCR_TR_GLOB_FPTR, "Fifo Pointer");
+ TGREAD(_BGP_DCR_TR_GLOB_NADDR, "Node Address");
+ TGREAD(_BGP_DCR_TR_GLOB_VCFG0, "VC0 Configuration");
+ TGREAD(_BGP_DCR_TR_GLOB_VCFG1, "VC1 Configuration");
+ TGREAD(_BGP_DCR_TR_REC_PRXEN, "Receive Exception Enable");
+ TGREAD(_BGP_DCR_TR_REC_PRXF, "Receive Exception Flag ");
+ TGREAD(_BGP_DCR_TR_REC_PRDA, "Receive Diagnostic Address");
+ TGREAD(_BGP_DCR_TR_REC_PRDD, "Receive Diagnostic Data");
+ TGREAD(_BGP_DCR_TR_INJ_PIXEN, "Injection Exception Enable");
+ TGREAD(_BGP_DCR_TR_INJ_PIXF, "Injection Exception Flag ");
+ TGREAD(_BGP_DCR_TR_INJ_PIDA, "Injection Diagnostic Address");
+ TGREAD(_BGP_DCR_TR_INJ_PIDD, "Injection Diagnostic Data");
+ TGREAD(_BGP_DCR_TR_INJ_CSPY0, "VC0 payload checksum");
+ TGREAD(_BGP_DCR_TR_INJ_CSHD0, "VC0 header checksum");
+ TGREAD(_BGP_DCR_TR_INJ_CSPY1, "VC1 payload checksum");
+ TGREAD(_BGP_DCR_TR_INJ_CSHD1, "VC1 header checksum");
+
+ TGREAD(_BGP_DCR_TR_CLASS_RDR0, "Route Desc 0, 1");
+ TGREAD(_BGP_DCR_TR_CLASS_RDR1, "Route Desc 2, 3");
+ TGREAD(_BGP_DCR_TR_CLASS_RDR2, "Route Desc 4, 5");
+ TGREAD(_BGP_DCR_TR_CLASS_RDR3, "Route Desc 6, 7");
+ TGREAD(_BGP_DCR_TR_CLASS_RDR4, "Route Desc 8, 9");
+ TGREAD(_BGP_DCR_TR_CLASS_RDR5, "Route Desc 10, 11");
+ TGREAD(_BGP_DCR_TR_CLASS_RDR6, "Route Desc 12, 13");
+ TGREAD(_BGP_DCR_TR_CLASS_RDR7, "Route Desc 14, 15");
+ TGREAD(_BGP_DCR_TR_CLASS_ISRA, "Idle pattern low");
+ TGREAD(_BGP_DCR_TR_CLASS_ISRB, "Idle pattern high");
+
+ TGREAD(_BGP_DCR_TR_DMA_DMAA, "SRAM diagnostic addr");
+ TGREAD(_BGP_DCR_TR_DMA_DMAD, "SRAM diagnostic data");
+ TGREAD(_BGP_DCR_TR_DMA_DMADI, "SRAM diagnostic data inc");
+ TGREAD(_BGP_DCR_TR_DMA_DMAH, "SRAM diagnostic header");
+
+ TGREAD(_BGP_DCR_TR_ERR_R0_CRC, "CH0: Receiver link CRC errors");
+ TGREAD(_BGP_DCR_TR_ERR_R0_CE, "CH0: Receiver SRAM errors corrected");
+ TGREAD(_BGP_DCR_TR_ERR_S0_RETRY, "CH0: Sender link retransmissions");
+ TGREAD(_BGP_DCR_TR_ERR_S0_CE, "CH0: Sender SRAM errors corrected");
+
+ TGREAD(_BGP_DCR_TR_ERR_R1_CRC, "CH1: Receiver link CRC errors");
+ TGREAD(_BGP_DCR_TR_ERR_R1_CE, "CH1: Receiver SRAM errors corrected");
+ TGREAD(_BGP_DCR_TR_ERR_S1_RETRY, "CH1: Sender link retransmissions");
+ TGREAD(_BGP_DCR_TR_ERR_S1_CE, "CH1: Sender SRAM errors corrected");
+
+ TGREAD(_BGP_DCR_TR_ERR_R2_CRC, "CH2: Receiver link CRC errors");
+ TGREAD(_BGP_DCR_TR_ERR_R2_CE, "CH2: Receiver SRAM errors corrected");
+ TGREAD(_BGP_DCR_TR_ERR_S2_RETRY, "CH2: Sender link retransmissions");
+ TGREAD(_BGP_DCR_TR_ERR_S2_CE, "CH2: Sender SRAM errors corrected");
+
+ TGREAD(_BGP_DCR_TR_ARB_RCFG, "ARB: General router config");
+ TGREAD(_BGP_DCR_TR_ARB_RSTAT, "ARB: General router status");
+ TGREAD(_BGP_DCR_TR_ARB_HD00, "ARB: Next hdr, CH0, VC0");
+ TGREAD(_BGP_DCR_TR_ARB_HD01, "ARB: Next hdr, CH0, VC1");
+ TGREAD(_BGP_DCR_TR_ARB_HD10, "ARB: Next hdr, CH1, VC0");
+ TGREAD(_BGP_DCR_TR_ARB_HD11, "ARB: Next hdr, CH1, VC1");
+ TGREAD(_BGP_DCR_TR_ARB_HD20, "ARB: Next hdr, CH2, VC0");
+ TGREAD(_BGP_DCR_TR_ARB_HD21, "ARB: Next hdr, CH2, VC1");
+
+ rc = snprintf(page, remaining, "CH0: status=%08x\n",
+ in_be32((unsigned*)(bgcol->chn[0].mioaddr + _BGP_TRx_Sx)));
+ if (rc < 0) goto out;
+ if (rc > remaining) { remaining = 0; goto out; }
+ page += rc; remaining -= rc;
+
+ rc = snprintf(page, remaining, "CH1: status=%08x\n",
+ in_be32((unsigned*)(bgcol->chn[1].mioaddr + _BGP_TRx_Sx)));
+ if (rc < 0) goto out;
+ if (rc > remaining) { remaining = 0; goto out; }
+ page += rc; remaining -= rc;
+
+ rc = snprintf(page, remaining, "Data placement total=%d guess wrong=%d\n",
+ bgcol->recv_total, bgcol->recv_guess_miss) ;
+ if (rc < 0) goto out;
+ if (rc > remaining) { remaining = 0; goto out; }
+ page += rc; remaining -= rc;
+ rc = snprintf(page,remaining, "Receive no_skbuff=%d no_first_packet=%d\n",
+ bgcol->recv_no_skbuff, bgcol->recv_no_first_packet) ;
+ if (rc < 0) goto out;
+ if (rc > remaining) { remaining = 0; goto out; }
+ page += rc; remaining -= rc;
+
+#if defined(KEEP_BG_COL_STATISTICS)
+ {
+/* int x ; */
+/* for( x=0; x<=COL_FIFO_SIZE;x+=1) */
+/* { */
+/* rc = snprintf(page, remaining, "sf_h0[%d]=%d\n", x, bgcol->send_fifo_histogram0[x]) ; */
+/* if (rc < 0) goto out; */
+/* if (rc > remaining) { remaining = 0; goto out; } */
+/* page += rc; remaining -= rc; */
+/* } */
+/* for( x=0; x<=COL_FIFO_SIZE;x+=1) */
+/* { */
+/* rc = snprintf(page, remaining, "sf_h1[%d]=%d\n", x, bgcol->send_fifo_histogram1[x]) ; */
+/* if (rc < 0) goto out; */
+/* if (rc > remaining) { remaining = 0; goto out; } */
+/* page += rc; remaining -= rc; */
+/* } */
+/* for( x=0; x<=COL_FIFO_SIZE;x+=1) */
+/* { */
+/* rc = snprintf(page, remaining, "rf_h0[%d]=%d\n", x, bgcol->recv_fifo_histogram0[x]) ; */
+/* if (rc < 0) goto out; */
+/* if (rc > remaining) { remaining = 0; goto out; } */
+/* page += rc; remaining -= rc; */
+/* } */
+/* for( x=0; x<=COL_FIFO_SIZE;x+=1) */
+/* { */
+/* rc = snprintf(page, remaining, "rf_h1[%d]=%d\n", x, bgcol->recv_fifo_histogram1[x]) ; */
+/* if (rc < 0) goto out; */
+/* if (rc > remaining) { remaining = 0; goto out; } */
+/* page += rc; remaining -= rc; */
+/* } */
+ rc=snprintf(page, remaining, "spurious interrupts=%d\n", bgcol->spurious_interrupts) ;
+ if (rc < 0) goto out;
+ if (rc > remaining) { remaining = 0; goto out; }
+ page += rc; remaining -= rc;
+ }
+#endif
+
+ out:
+
+ return count - remaining;
+}
+
+
+static int bgcol_proc_write(struct file *filp, const char __user *buff, unsigned long len, void *data)
+ {
+ char proc_write_buffer[256] ;
+ unsigned long actual_len=(len<255) ? len : 255 ;
+ int rc = copy_from_user( proc_write_buffer, buff, actual_len ) ;
+ if( rc != 0 ) return -EFAULT ;
+ proc_write_buffer[actual_len] = 0 ;
+ return actual_len ;
+ }
+
+/* static unsigned char xtable[256] = */
+/* { */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, */
+/* }; */
+/* */
+/* static int bgcol_atoix(const unsigned char *cp) */
+/* { */
+/* int result = 0 ; */
+/* unsigned char ecp = xtable[*cp] ; */
+/* while (ecp < 0x10) */
+/* { */
+/* result = (result << 4 ) | ecp ; */
+/* cp += 1 ; */
+/* ecp = xtable[*cp] ; */
+/* } */
+/* return result ; */
+/* } */
+
+static int dcrcopy ;
+static int proc_docoldcr(struct ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ int rc ;
+ TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+
+ dcrcopy=mfdcrx((unsigned int)(ctl->extra1)) ;
+ rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+ TRACE("(<)") ;
+ return rc ;
+ }
+
+static int proc_docolmio_0(struct ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ int rc ;
+ TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+ ctl->data=(unsigned*)(static_col.chn[0].mioaddr + (unsigned int)(ctl->extra1)) ;
+ rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+ TRACE("(<)") ;
+ return rc ;
+ }
+
+static int proc_docolmio_1(struct ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ int rc ;
+ TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+ ctl->data=(unsigned*)(static_col.chn[1].mioaddr + (unsigned int)(ctl->extra1)) ;
+ rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+ TRACE("(<)") ;
+ return rc ;
+ }
+
+static struct ctl_path bgp_col_ctl_path[] = {
+ { .procname = "bgp", .ctl_name = 0, },
+ { .procname = "collective", .ctl_name = 0, },
+ { },
+};
+
+#define CTL_PARAM_ADDR(Name,Addr) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .data = (int *)Addr, \
+ .maxlen = sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_dointvec \
+ }
+
+#define CTL_PARAM_MIO_0(Name,Offset) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .maxlen = sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_docolmio_0, \
+ .extra1 = (void *)Offset \
+ }
+
+#define CTL_PARAM_MIO_1(Name,Offset) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .maxlen = sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_docolmio_1, \
+ .extra1 = (void *)Offset \
+ }
+
+#define CTL_PARAM_COLDCR(Name,DCRNumber) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .data = &dcrcopy , \
+ .maxlen = sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_docoldcr , \
+ .extra1 = (void *) DCRNumber \
+ }
+
+static unsigned int static_pagesize = 1 << PAGE_SHIFT ;
+
+static struct ctl_table bgp_col_ctl_table[] = {
+/* CTL_PARAM_ADDR("napi",&bgcol_diagnostic_use_napi) , */
+ CTL_PARAM_ADDR("pagesize",&static_pagesize) ,
+ CTL_PARAM_ADDR("tracemask",&bgcol_debug_tracemask) ,
+/* CTL_PARAM_ADDR("e10000_diag_count",&e10000_diag_count) , */
+ CTL_PARAM_COLDCR("Receive-Exception-Enable",_BGP_DCR_TR_REC_PRXEN),
+ CTL_PARAM_COLDCR("Receive-Exception-Flag",_BGP_DCR_TR_REC_PRXF),
+ CTL_PARAM_COLDCR("Injection-Exception-Enable",_BGP_DCR_TR_INJ_PIXEN),
+ CTL_PARAM_COLDCR("Injection-Exception-Flag ",_BGP_DCR_TR_INJ_PIXF),
+ CTL_PARAM_MIO_0("BGP_TR0_S0",_BGP_TRx_Sx) ,
+ CTL_PARAM_MIO_1("BGP_TR1_S1",_BGP_TRx_Sx) ,
+ CTL_PARAM_ADDR("curr_conn",&static_col.curr_conn) ,
+ CTL_PARAM_ADDR("current_xmit_len",&static_col.current_xmit_len) ,
+ CTL_PARAM_ADDR("fragidx_xmit",&static_col.fragidx_xmit) ,
+ CTL_PARAM_ADDR("recv_total",&static_col.recv_total) ,
+ CTL_PARAM_ADDR("recv_guess_miss",&static_col.recv_guess_miss) ,
+ CTL_PARAM_ADDR("recv_no_skbuff",&static_col.recv_no_skbuff) ,
+ CTL_PARAM_ADDR("recv_no_first_packet",&static_col.recv_no_first_packet) ,
+ CTL_PARAM_ADDR("deliver_without_workqueue",&static_col.deliver_without_workqueue) ,
+#if defined(KEEP_BG_COL_STATISTICS)
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sf_h0" ,
+ .data = static_col.send_fifo_histogram0,
+ .maxlen = COL_FIFO_SIZE*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sf_h1" ,
+ .data = static_col.send_fifo_histogram1,
+ .maxlen = COL_FIFO_SIZE*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "rf_h0" ,
+ .data = static_col.recv_fifo_histogram0,
+ .maxlen = COL_FIFO_SIZE*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "rf_h1" ,
+ .data = static_col.recv_fifo_histogram1,
+ .maxlen = COL_FIFO_SIZE*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+#if defined(EXTRA_TUNING)
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sf_h2" ,
+ .data = static_col.send_fifo_histogram2,
+ .maxlen = COL_FIFO_SIZE*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "rf_h2" ,
+ .data = static_col.recv_fifo_histogram2,
+ .maxlen = COL_FIFO_SIZE*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+
+#endif
+#endif
+ { 0 }
+
+
+} ;
+
+static void register_collective_sysctl(struct bg_col *col)
+{
+ col->sysctl_table_header=register_sysctl_paths(bgp_col_ctl_path,bgp_col_ctl_table) ;
+ TRACEN(k_t_init, "sysctl_table_header=%p",col->sysctl_table_header) ;
+
+}
+
+int __init
+bgcol_module_init(void)
+{
+ struct bg_col *col = &static_col ;
+ int rc;
+ unsigned long long tr0, tr1, ts0, ts1;
+
+ register_collective_sysctl(&static_col) ;
+
+ tr0=((unsigned long long)_BGP_UA_COL0<<32) + _BGP_PA_COL0;
+ tr1=((unsigned long long)_BGP_UA_COL1<<32) + _BGP_PA_COL1;
+ ts0=((unsigned long long)_BGP_UA_TORUS0<<32) + _BGP_PA_TORUS0;
+ ts1=((unsigned long long)_BGP_UA_TORUS1<<32) + _BGP_PA_TORUS1;
+
+#if defined(KEEP_BG_COL_STATISTICS) || defined(BGP_COL_STATUS_VISIBILITY)
+ bgpnetDir = proc_mkdir("bgpcol", NULL);
+ if (bgpnetDir) {
+#if defined(KEEP_BG_COL_STATISTICS)
+ statisticsEntry = create_proc_entry("statistics", S_IRUGO, bgpnetDir);
+ if (statisticsEntry) {
+ statisticsEntry->nlink = 1;
+ statisticsEntry->read_proc = (void*) bgpnet_statistics_read;
+ statisticsEntry->write_proc = (void*) bgcol_proc_write;
+ statisticsEntry->data = col ;
+ }
+#endif
+#if defined(BGP_COL_STATUS_VISIBILITY)
+ statusEntry = create_proc_entry("status", S_IRUGO, bgpnetDir);
+ if (statusEntry) {
+ statusEntry->nlink = 1;
+ statusEntry->read_proc = (void*) bgpnet_status_read;
+ statusEntry->write_proc = (void*) bgcol_proc_write;
+ statusEntry->data = col ;
+ }
+#endif
+/* #if defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE) */
+/* tracemaskEntry = create_proc_entry("tracemask", S_IRUGO, bgpnetDir); */
+/* if (tracemaskEntry) { */
+/* tracemaskEntry->nlink = 1; */
+/* tracemaskEntry->read_proc = (void*) bgpnet_tracemask_read; */
+/* tracemaskEntry->write_proc = (void*) bgpnet_tracemask_write; */
+/* } */
+/* #endif */
+ }
+#endif
+
+ rc = bgcol_init(col);
+ if (rc)
+ goto err_col_init;
+
+ mb();
+
+
+ return 0;
+
+ err_col_init:
+ /* XXX: unmap IRQs */
+ return rc;
+}
diff --git a/drivers/net/bgp_collective/bgcol.h b/drivers/net/bgp_collective/bgcol.h
new file mode 100644
index 00000000000000..1ea2b7638cb7ac
--- /dev/null
+++ b/drivers/net/bgp_collective/bgcol.h
@@ -0,0 +1,285 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Chris Ward <tjcw@uk.ibm.com>
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ * Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * Description: Header file for col device
+ *
+ *
+ ********************************************************************/
+#ifndef __DRIVERS__NET__BLUEGENE__COL_H__
+#define __DRIVERS__NET__BLUEGENE__COL_H__
+
+#define KEEP_BG_COL_STATISTICS
+#define EXTRA_TUNING
+/* #define KEEP_RECV_TOTAL */
+#define HAS_MISSED_INTERRUPT_TIMER
+
+#define _BGP_COL_BASE (0x610000000ULL)
+#define _BGP_COL_OFFSET (0x001000000ULL)
+#define _BGP_COL_SIZE (0x400)
+
+#define _BGP_TORUS_BASE (0x601140000ULL)
+#define _BGP_TORUS_OFFSET (0x000010000ULL)
+
+#define BGP_MAX_CHANNEL 2
+#define BGP_COL_CHANNEL 0
+#define BGP_COL_ADDR_BITS 24
+
+#define COL_CHANNEL_PADDR(c) (_BGP_COL_BASE + ((c)*_BGP_COL_OFFSET))
+#define COL_CHANNEL_DCROFF(c) (0x20 + ((c) * 8))
+#define COL_DCR_BASE (0xc00)
+#define COL_DCR_SIZE (0x80)
+
+#define COL_IRQMASK_INJ (_TR_INJ_PIX_APAR0 | _TR_INJ_PIX_APAR1 |\
+ _TR_INJ_PIX_ALIGN0 | _TR_INJ_PIX_ALIGN1 |\
+ _TR_INJ_PIX_ADDR0 | _TR_INJ_PIX_ADDR1 |\
+ _TR_INJ_PIX_DPAR0 | _TR_INJ_PIX_DPAR1 |\
+ _TR_INJ_PIX_COLL | _TR_INJ_PIX_UE |\
+ _TR_INJ_PIX_PFO0 | _TR_INJ_PIX_PFO1 |\
+ _TR_INJ_PIX_HFO0 | _TR_INJ_PIX_HFO1)
+
+#define COL_IRQMASK_REC (_TR_REC_PRX_APAR0 | _TR_REC_PRX_APAR1 |\
+ _TR_REC_PRX_ALIGN0 | _TR_REC_PRX_ALIGN1 |\
+ _TR_REC_PRX_ADDR0 | _TR_REC_PRX_ADDR1 |\
+ _TR_REC_PRX_COLL | _TR_REC_PRX_UE |\
+ _TR_REC_PRX_PFU0 | _TR_REC_PRX_PFU1 |\
+ _TR_REC_PRX_HFU0 | _TR_REC_PRX_HFU1 |\
+ _TR_REC_PRX_WM0 | _TR_REC_PRX_WM1 )
+
+#define COL_IRQ_RCV_PENDING_MASK(idx) (1U << (1 - idx))
+#define COL_IRQ_INJ_PENDING_MASK(idx) (1U << (2 - idx))
+
+
+#define COL_IRQ_GROUP 5
+#define COL_IRQ_BASE 20
+#define COL_IRQ_NONCRIT_NUM 20
+#define COL_NONCRIT_BASE 0
+#define COL_FIFO_SIZE 8
+
+
+union bgcol_header {
+ unsigned int raw;
+ struct {
+ unsigned int pclass : 4;
+ unsigned int p2p : 1;
+ unsigned int irq : 1;
+ unsigned vector : 24;
+ unsigned int csum_mode : 2;
+ } p2p;
+ struct {
+ unsigned int pclass : 4;
+ unsigned int p2p : 1;
+ unsigned int irq : 1;
+ unsigned int op : 3;
+ unsigned int opsize : 7;
+ unsigned int tag : 14;
+ unsigned int csum_mode : 2;
+ } bcast;
+} __attribute__((packed));
+
+union bgcol_status {
+ unsigned int raw;
+ struct {
+ unsigned int inj_pkt : 4;
+ unsigned int inj_qwords : 4;
+ unsigned int __res0 : 4;
+ unsigned int inj_hdr : 4;
+ unsigned int rcv_pkt : 4;
+ unsigned int rcv_qwords : 4;
+ unsigned int __res1 : 3;
+ unsigned int irq : 1;
+ unsigned int rcv_hdr : 4;
+ } x;
+} __attribute__((packed));
+
+static inline unsigned int bgcol_status_inj_pkt (unsigned int status) { return status >> 28 ; }
+static inline unsigned int bgcol_status_inj_qwords(unsigned int status) { return (status >> 24) & 0x0f ; }
+static inline unsigned int bgcol_status_inj_hdr (unsigned int status) { return (status >> 16) & 0x0f ; }
+static inline unsigned int bgcol_status_rcv_pkt (unsigned int status) { return (status >> 12) & 0x0f ; }
+static inline unsigned int bgcol_status_rcv_qwords(unsigned int status) { return (status >> 8 ) & 0x0f ; }
+static inline unsigned int bgcol_status_irq (unsigned int status) { return (status >> 4 ) & 1 ; }
+static inline unsigned int bgcol_status_rcv_hdr (unsigned int status) { return status & 0x0f ; }
+
+
+/* some device defined */
+#define _BGP_DCR_TR_RCTRL (_BGP_DCR_TR_CH0_RCTRL - _BGP_DCR_TR_CH0)
+#define _BGP_DCR_TR_SCTRL (_BGP_DCR_TR_CH0_SCTRL - _BGP_DCR_TR_CH0)
+#define _BGP_DCR_TR_RSTAT (_BGP_DCR_TR_CH0_RSTAT - _BGP_DCR_TR_CH0)
+
+/* hardware specification: 4 bytes address, 256 bytes payload */
+#define COL_ALEN 4
+#define COL_PAYLOAD 256
+
+#define FRAGMENT_LISTS 256
+
+
+struct bgpnet_dev
+{
+ int major,minor; /* device major, minor */
+ unsigned long long physaddr; /* physical address */
+ struct task_struct* current; /* process holding device */
+ int signum; /* signal to send holding process */
+ wait_queue_head_t read_wq;
+ int read_complete;
+ void *regs; /* mapped regs (only used with col) */
+ struct semaphore sem; /* interruptible semaphore */
+ struct cdev cdev; /* container device? */
+};
+
+
+struct bgcol_channel {
+ phys_addr_t paddr;
+ unsigned long mioaddr;
+ unsigned int dcrbase;
+ unsigned long irq_rcv_pending_mask;
+ unsigned long irq_inj_pending_mask;
+ struct timer_list inj_timer;
+ unsigned int injected;
+ unsigned int partial_injections;
+ unsigned int unaligned_hdr_injections;
+ unsigned int unaligned_data_injections;
+ unsigned int received;
+ unsigned int inject_fail;
+ unsigned int dropped;
+ unsigned int delivered;
+ unsigned int idx;
+ struct bg_col* col;
+ struct bgpnet_dev* chrdev;
+};
+
+enum {
+ k_ethkey_table_size=256
+};
+
+struct bg_col_per_eth {
+ unsigned char * payload ;
+ unsigned int expect ;
+};
+
+struct bg_col {
+ spinlock_t lock;
+ spinlock_t irq_lock;
+ struct bgcol_channel chn[BGP_MAX_CHANNEL];
+ unsigned int dcrbase;
+ unsigned int curr_conn;
+ unsigned int nodeid;
+ unsigned int inj_wm_mask;
+ unsigned int bgnet_channel ;
+
+ unsigned int max_packets_per_frame ;
+ unsigned int mtu ;
+
+ /* statistics */
+ unsigned fragment_timeout;
+
+ /* Interrupt management */
+ unsigned int handler_running ;
+ /* Transmission items */
+ struct bglink_hdr_col lnkhdr_xmit __attribute__((aligned(8))); /* Link header being used for partially-sent skb */
+ spinlock_t irq_lock_xmit ;
+ struct sk_buff_head skb_list_xmit ; /* List of skb's to be sent */
+ struct sk_buff_head skb_list_free ; /* Keep a list of skb's to free at user level */
+ struct sk_buff * skb_current_xmit ; /* Partially-sent skb, if any */
+ void * current_xmit_data ; /* Data from current skb adjusted for alignment */
+ int current_xmit_len ; /* Length of current skb data */
+ union bgcol_header dest_xmit ;
+ unsigned int fragidx_xmit ;
+
+ /* Reception items */
+ struct bglink_hdr_col lnkhdr_rcv __attribute__((aligned(8))); /* Link header pulled out of reception FIFO */
+ struct sk_buff_head fragskb_list_rcv ; /* List of fully-received frames */
+ struct sk_buff_head fragskb_list_discard ; /* List of frames to discard */
+ struct sk_buff * skb_in_waiting ; /* An skb ready to catch the start of a 'new' frame */
+ struct sk_buff * skb_mini ; /* A 'miniature' skbuff just right for catching single-packet frames */
+
+ /* Core-to-core items */
+ struct sk_buff_head skb_list_for_filling ;
+ struct sk_buff_head skb_list_for_delivering ;
+ struct sk_buff_head skb_list_for_freeing ;
+
+ unsigned int deliver_without_workqueue ; /* Whether to activate the 'deliver on other core' code for an skbuff */
+
+
+ struct bgnet_dev *bgnet ;
+
+ /* Statistics */
+
+ int recv_total ;
+ int recv_guess_miss ;
+ int recv_no_skbuff ;
+ int recv_no_first_packet ;
+
+ /* 'big' tables */
+ struct bg_col_per_eth per_eth_table[k_ethkey_table_size] ;
+ struct sk_buff * skb_rcv_table[k_ethkey_table_size] ;
+
+ /* Tuning statistics */
+#if defined(KEEP_BG_COL_STATISTICS)
+ unsigned int send_fifo_histogram0[16] ;
+ unsigned int send_fifo_histogram1[16] ;
+ unsigned int recv_fifo_histogram0[16] ;
+ unsigned int recv_fifo_histogram1[16] ;
+#if defined(EXTRA_TUNING)
+ unsigned int send_fifo_histogram2[16] ;
+ unsigned int recv_fifo_histogram2[16] ;
+#endif
+#endif
+ unsigned int spurious_interrupts ;
+ /* Diagnostic controls */
+ struct ctl_table_header * sysctl_table_header ;
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+ struct timer_list missed_interrupt_timer ;
+#endif
+};
+
+/**********************************************************************
+ * driver
+ **********************************************************************/
+
+#define COL_DEV_NAME "bgcol"
+
+extern int bgcol_debug_tracemask ;
+struct bg_col;
+
+struct bg_col *bgcol_get_dev(void);
+void bgcol_enable_interrupts(struct bg_col* col);
+unsigned int bgcol_get_nodeid(struct bg_col* col);
+void bgcol_link_hdr_init(struct bglink_hdr_col *lnkhdr);
+int bgcol_xmit(struct bg_col *col, int chnidx, union bgcol_header dest,
+ struct bglink_hdr_col *lnkhdr, void *data, int len);
+int __bgcol_xmit(struct bg_col *col, int chnidx, union bgcol_header dest,
+ struct bglink_hdr_col *lnkhdr, void *data, int len);
+
+void bgcol_set_mtu(struct bg_col* col, unsigned int mtu) ;
+void bgcol_enable_inj_wm_interrupt(struct bgcol_channel* chn);
+void bgcol_disable_inj_wm_interrupt(struct bgcol_channel* chn);
+void bgcol_enable_rcv_wm_interrupt(struct bgcol_channel* chn);
+void bgcol_disable_rcv_wm_interrupt(struct bgcol_channel* chn);
+
+void bgcol_duplex_slih(unsigned long dummy) ;
+
+int col_start_xmit(struct sk_buff *skb, struct net_device *dev);
+int __init bgcol_module_init(void) ;
+enum {
+ bgcol_diagnostic_use_napi = 1
+};
+/* extern int bgcol_diagnostic_use_napi ; */
+
+#endif
diff --git a/drivers/net/bgp_collective/bglink.h b/drivers/net/bgp_collective/bglink.h
new file mode 100644
index 00000000000000..37feca2a68a2a7
--- /dev/null
+++ b/drivers/net/bgp_collective/bglink.h
@@ -0,0 +1,158 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Chris Ward <tjcw@uk.ibm.com>
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * Description: Link layer definitions
+ *
+ *
+ ********************************************************************/
+#ifndef __DRIVERS__BLUEGENE__LINK_H__
+#define __DRIVERS__BLUEGENE__LINK_H__
+
+#include <linux/skbuff.h>
+
+#include <asm/atomic.h>
+
+/* link layer protocol IDs */
+#define BGLINK_P_NET 0x01
+#define BGLINK_P_CON 0x10
+
+union link_proto_opt {
+ u16 raw;
+ struct {
+ u16 option : 4;
+ u16 pad_head : 4;
+ u16 pad_tail : 8;
+ } opt_net;
+ struct {
+ u16 len;
+ } opt_con;
+} __attribute__((packed));
+
+struct bglink_hdr_col {
+ u32 dst_key;
+ u32 src_key;
+ u16 conn_id;
+ u8 this_pkt;
+ u8 total_pkt;
+ u16 lnk_proto; /* net, con, ... */
+ union link_proto_opt opt;
+} ; /* __attribute__((packed)); */
+
+struct bglink_hdr_col_map {
+ u32 dst_key;
+ u32 src_key;
+ u32 conn_this_total;
+ u32 proto_option_head_tail ;
+} ;
+
+struct bglink_hdr_torus {
+ u32 dst_key;
+ u32 len;
+ u16 lnk_proto; /* net, con, ... */
+ union link_proto_opt opt;
+} ; /* __attribute__((packed)); */
+
+/* link protocol callbacks
+ * rcv is called when new packet arrives
+ * flush is called when the device was busy and becomes idle
+ * again (flow control)
+ */
+struct bgnet_dev ;
+struct bg_col ;
+struct bglink_proto {
+ u16 lnk_proto;
+ int receive_from_self;
+ int (*col_rcv)(struct bg_col*, struct sk_buff*, struct bglink_hdr_col *, struct bglink_proto *proto);
+ int (*col_rcv_trimmed)(struct bg_col*, struct sk_buff*, struct bglink_proto *proto, unsigned int src_key);
+ int (*col_flush)(int chn);
+ int (*torus_rcv)(struct sk_buff*, struct bglink_hdr_torus *);
+ void *private;
+ struct list_head list;
+};
+
+extern struct list_head linkproto_list;
+
+static void bglink_register_proto(struct bglink_proto *proto) __attribute__ ((unused)) ;
+static void bglink_unregister_proto(struct bglink_proto *proto) __attribute__ ((unused)) ;;
+static struct bglink_proto* bglink_find_proto(u16 proto)__attribute__ ((unused)) ;
+
+enum {
+ k_link_protocol_limit = 8 /* we only actually have 'eth' and 'eth_reflector' at the moment, but we might get 'con' and more */
+};
+extern struct bglink_proto * proto_array[k_link_protocol_limit] ;
+static void bglink_register_proto(struct bglink_proto *proto)
+{
+ if( proto->lnk_proto < k_link_protocol_limit)
+ {
+ proto_array[proto->lnk_proto] = proto ;
+ }
+}
+
+static void bglink_unregister_proto(struct bglink_proto *proto)
+{
+ if( proto->lnk_proto < k_link_protocol_limit)
+ {
+ proto_array[proto->lnk_proto] = NULL ;
+ }
+}
+
+static struct bglink_proto* bglink_find_proto(u16 proto)
+{
+ return proto_array[proto & (k_link_protocol_limit-1)] ;
+}
+
+
+#if 0
+/*
+ * Here are some thoughts on how we might better consolidate link headers
+ * for the col and torus. The idea is that there's an 8-byte packet header
+ * that must be sent (at least) once per packet, and an 8-byte fragment header
+ * that has to be included with every fragment. For the col we can include
+ * both headers in every fragment. For the torus, there's not room to send
+ * the packet header in every fragment, so we'd have to send it once as part
+ * of the payload in the first fragment (as we're doing now anyway).
+ * The various structures might look something like:
+ */
+
+struct pkt_hdr {
+ u32 lnk_proto : 8;
+ u32 dst_key : 24;
+ u16 len;
+ u16 private;
+} __attribute__((packed));
+
+struct frag_hdr {
+ u32 offset;
+ u32 conn_id : 8;
+ u32 src_key : 24;
+} __attribute__((packed));
+
+struct frag_hdr_col {
+ struct pkt_hdr pkt;
+ struct frag_hdr frag;
+} __attribute__((packed));
+
+struct frag_hdr_torus {
+ union torus_fifo_hw_header fifo;
+ struct frag_hdr frag;
+} __attribute__((packed));
+#endif
+
+#endif /* !__DRIVERS__BLUEGENE__LINK_H__ */
diff --git a/drivers/net/bgp_collective/bgnet.c b/drivers/net/bgp_collective/bgnet.c
new file mode 100644
index 00000000000000..2dc45ac5cef0d7
--- /dev/null
+++ b/drivers/net/bgp_collective/bgnet.c
@@ -0,0 +1,827 @@
+/*********************************************************************
+ *
+ * Description: Blue Gene driver exposing col and torus as a NIC
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors:
+ * Chris Ward <tjcw@uk.ibm.com>
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ * Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/etherdevice.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
+#include <linux/workqueue.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/bgp_personality.h>
+#include <asm/delay.h>
+
+#include <asm/bluegene.h>
+
+#include "bglink.h"
+#include "bgnet.h"
+#include "bgcol.h"
+/* #include "bgtor.h" */
+
+
+/**********************************************************************
+ * defines
+ **********************************************************************/
+
+#define DRV_NAME "bgnet"
+#define DRV_VERSION "0.5"
+#define DRV_DESC "Blue Gene NIC (IBM)"
+
+MODULE_DESCRIPTION(DRV_DESC);
+MODULE_AUTHOR("IBM");
+
+/* #define TRUST_TREE_CRC */
+
+#include <linux/KernelFxLog.h>
+
+#include "../bgp_network/bgp_net_traceflags.h"
+
+
+#define XTRACEN(i,x...)
+#if defined(REQUIRE_TRACE)
+#define TRACE(x...) { printk(KERN_EMERG x) ; }
+#define TRACE1(x...) { printk(KERN_EMERG x) ; }
+#define TRACE2(x...) { printk(KERN_EMERG x) ; }
+#define TRACEN(i,x...) { printk(KERN_EMERG x) ; }
+#define TRACED(x...) { printk(KERN_EMERG x) ; }
+#elif defined(CONFIG_BLUEGENE_COLLECTIVE_TRACE)
+#define TRACE(x...) KernelFxLog(bgcol_debug_tracemask & k_t_general,x)
+#define TRACE1(x...) KernelFxLog(bgcol_debug_tracemask & k_t_lowvol,x)
+#define TRACE2(x...) KernelFxLog(bgcol_debug_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgcol_debug_tracemask & (i),x)
+#define TRACED(x...) KernelFxLog(1,x)
+#define TRACES(x...) KernelFxLog(1,x)
+#else
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#define TRACES(x...)
+#endif
+
+/* An IPv4 address for slotting into a trace message */
+#define NIPQ(X) ((X)>>24)&0xff,((X)>>16)&0xff,((X)>>8)&0xff,(X)&0xff
+
+#define BGNET_FRAG_MTU 240
+#define BGNET_MAX_MTU (BGNET_FRAG_MTU * 254)
+#define BGNET_DEFAULT_MTU ETH_DATA_LEN
+
+
+static BGP_Personality_t bgnet_personality;
+/* static struct net_device *static_dev ; */
+
+/* static struct bglink_proto bgnet_lnk; */
+
+static DEFINE_SPINLOCK(bgnet_lock);
+static LIST_HEAD(bgnet_list);
+
+struct skb_cb_lnk {
+ struct bglink_hdr_col lnkhdr;
+ union bgcol_header dest;
+};
+
+int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev, unsigned int x, unsigned int y, unsigned int z) ;
+
+/**********************************************************************
+ * Linux module
+ **********************************************************************/
+
+MODULE_DESCRIPTION("BlueGene Ethernet driver");
+MODULE_LICENSE("GPL");
+
+int bgnic_driverparm = 0 ;
+
+static void dumpmem(const void *address, unsigned int length, const char * label)
+ {
+ int x ;
+ TRACEN(k_t_fifocontents,"Memory dump, length=%d: %s",length,label) ;
+ if( length > 256 ) {
+ length = 256 ;
+ }
+ for (x=0;x<length;x+=32)
+ {
+ int *v = (int *)(address+x) ;
+ TRACEN(k_t_fifocontents,"%p: %08x %08x %08x %08x %08x %08x %08x %08x",
+ v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
+ ) ;
+ }
+ }
+
+
+/**********************************************************************
+ * Linux' packet and skb management
+ **********************************************************************/
+
+
+static int bgnet_open(struct net_device* dev)
+{
+/* struct bgnet_dev* bgnet = (struct bgnet_dev*) netdev_priv(dev); */
+
+/* bgcol_enable_rcv_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */
+
+ TRACEN(k_t_napi,"netif_start_queue(dev=%p)",dev) ;
+ netif_start_queue(dev);
+
+ return 0;
+}
+
+static int bgnet_stop(struct net_device* dev)
+{
+/* struct bgnet_dev* bgnet = (struct bgnet_dev*) netdev_priv(dev); */
+
+ TRACEN(k_t_napi,"netif_stop_queue(dev=%p)",dev) ;
+ netif_stop_queue(dev);
+/* bgcol_disable_rcv_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */
+/* bgcol_disable_inj_wm_interrupt(&bgnet->col->chn[bgnet->col_channel]); */
+
+ return 0;
+}
+
+
+static int bgnet_change_mtu(struct net_device *dev, int new_mtu)
+{
+ struct bgnet_dev *bgnet = netdev_priv(dev);
+ if (new_mtu < 60 || new_mtu > BGNET_MAX_MTU )
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ bgcol_set_mtu(bgnet->bgcol, new_mtu+sizeof(struct ethhdr)) ;
+ return 0;
+}
+
+
+static inline void stamp_checksum_place_in_skb(struct sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ unsigned int eth_proto = eth->h_proto ;
+ struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+ /* unsigned int iphlen = 4*iph->ihl ; */
+ /* struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) ); */
+ /* struct udphdr *udph = (struct udphdr *) ( ((char *)(iph)) + (iphlen) ); */
+ unsigned int ip_proto = iph->protocol ;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+
+ if( eth_proto == ETH_P_IP) {
+ if( ip_proto == IPPROTO_TCP) skb->csum_offset = offsetof(struct tcphdr, check);
+ else if( ip_proto == IPPROTO_UDP) skb->csum_offset = offsetof(struct udphdr, check);
+ }
+
+}
+
+/*
+ * The hardware data rate on 'collective' is 6 bits/cycle, i.e. 5100Mb/s .
+ * We carry 240 bytes of payload in each 256 byte packet, and there are some bytes of 'overhead' as well
+ * (CRC, opcode, and a few others); giving a 'peak performance' TCP/IP data rate of a little under 4781 Mb/s .
+ * The 'collective' hardware should be able to do this in both directions simultaneously.
+ *
+ * Driving data into the compute fabric from the 10gE link can achieve more or less this, by using one core as
+ * interrupt handler for the 10gE and another core as interrupt handler for the collective, if you run (say)
+ * 16 TCP/IP sessions through the 10gE and the IO node, one to each compute node in the PSET.
+ *
+ * Driving data out of the compute fabric and into the 10gE in the normal way for linux device drivers causes
+ * the core handling the collective interrupt to go 100% busy; there are not enough cycles to drain the collective
+ * FIFO and also go through the linux networking stack. I have seen about 4Gb/s this way.
+ * To get the last 15% or so, it seems necessary to have more than one core helping with this work.
+ *
+ * I'm trying to do this by having one core handle the 'collective' interrupt and drain the FIFO, and then
+ * hand the sk_buff off to another core via a 'work queue', so that this second core can drive the linux
+ * network stack.
+ *
+ * I haven't measured the simultaneous-bidirectional data rate capability.
+ *
+ */
+static int bgnet_receive(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_hdr_col *lnkhdr, struct bglink_proto* proto)
+{
+ TRACE("(>) skb=%p lnkhdr=%p proto=%p", skb,lnkhdr,proto) ;
+ if( skb != NULL && lnkhdr != NULL && proto != NULL && -1 != (int) proto )
+ {
+ struct net_device *dev = (struct net_device*)proto->private;
+ struct bgnet_dev *bgnet = netdev_priv(dev);
+/* struct net_device *dev = (struct net_device*)((void *)bgnet - */
+/* netdev_priv(NULL)); */
+
+ TRACE("bgnet rcvd pkt: data=%p, len=%d, head=%d, tail=%d, res len=%d [%s:%d]",
+ skb->data, skb->len, lnkhdr->opt.opt_net.pad_head,
+ lnkhdr->opt.opt_net.pad_tail, skb->len - lnkhdr->opt.opt_net.pad_head - lnkhdr->opt.opt_net.pad_tail, __func__, __LINE__);
+
+/* if (skb->len % BGNET_FRAG_MTU != 0) */
+/* printk("bgnet: received packet size not multiple of %d\n", BGNET_FRAG_MTU); */
+
+ /* skb_pull and trim check for over/underruns. For 0 size the
+ * add/subtract is the same as a test */
+ __skb_pull(skb, lnkhdr->opt.opt_net.pad_head);
+ __skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail);
+
+ if (lnkhdr->src_key == bgnet->bgcol_vector) {
+ /* drop ether packets that are from ourselves */
+ /* bg tree device sends packets to itself when broadcasting */
+ kfree_skb(skb);
+ return 0;
+ }
+
+ /* dump_skb(skb); */
+
+ dumpmem(skb->data,skb->len,"Frame delivered via collective") ;
+
+ skb->dev = dev;
+ skb->protocol = eth_type_trans(skb, dev);
+
+ if ( k_trust_collective_crc) skb->ip_summed = CHECKSUM_PARTIAL ;
+ stamp_checksum_place_in_skb(skb) ;
+
+/* #if defined(TRUST_TREE_CRC) */
+/* skb->ip_summed = CHECKSUM_PARTIAL ; // Frame was checked by CRC, but we would need a checksum if it is being forwarded off the BGP fabric */
+/* // // Packets from tree-local addresses have been verified by tree hardware */
+/* // { */
+/* // struct ethhdr *eth = (struct ethhdr *)skb->data; */
+/* // if (bgnet->eth_mask == 0 || */
+/* // ((bgnet->eth_mask & *(unsigned int *)(&eth->h_source[0])) == */
+/* // (bgnet->eth_local))) */
+/* // { */
+/* // skb->ip_summed = CHECKSUM_UNNECESSARY ; */
+/* // } */
+/* // else */
+/* // { */
+/* // skb->ip_summed = CHECKSUM_NONE ; */
+/* // } */
+/* // } */
+/* #endif */
+
+ TRACE("Delivering skb->dev=%p skb->protocol=%d skb->pkt_type=%d skb->ip_summed=%d ",
+ skb->dev, skb->protocol, skb->pkt_type, skb->ip_summed ) ;
+ dumpmem(skb->data,skb->len,"Frame after stripping header") ;
+ dev->last_rx = jiffies;
+ bgnet->stats.rx_packets++;
+ bgnet->stats.rx_bytes += skb->len;
+
+ TRACE("bgnet_receive before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.tx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.tx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+ bgnet->stats.rx_packets, bgnet->stats.tx_packets, bgnet->stats.rx_bytes, bgnet->stats.tx_bytes, bgnet->stats.rx_frame_errors) ;
+/* TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; // Only tracing the torus ... */
+/* if( k_deliver_via_workqueue && bgnet->bgcol->deliver_via_workqueue ) */
+/* { */
+/* bgnet_deliver_via_workqueue(skb) ; */
+/* } */
+/* else */
+/* { */
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+ if( bgcol_diagnostic_use_napi)
+ {
+ {
+ TRACEN(k_t_napi|k_t_request,"netif_receive_skb(%p)",skb) ;
+ netif_receive_skb(skb) ;
+ }
+ }
+ else
+ {
+ netif_rx(skb);
+ }
+#else
+ netif_rx(skb);
+#endif
+/* } */
+ TRACE("bgnet_receive after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+ bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+
+ }
+ TRACE("(<)") ;
+
+ return 0;
+}
+
+static int bgnet_receive_trimmed(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_proto* proto, unsigned int src_key )
+{
+ TRACE("(>) skb=%p proto=%p", skb,proto) ;
+ if( skb != NULL && proto != NULL && -1)
+ {
+ struct net_device *dev = (struct net_device*)proto->private;
+ struct bgnet_dev *bgnet = netdev_priv(dev);
+/* struct net_device *dev = (struct net_device*)((void *)bgnet - */
+/* netdev_priv(NULL)); */
+
+ TRACE("bgnet rcvd pkt: data=%p, len=%d",
+ skb->data, skb->len);
+ if( src_key != bgnet->bgcol_vector)
+ {
+ dumpmem(skb->data,skb->len,"Frame delivered via collective") ;
+
+ skb->dev = dev;
+ skb->protocol = eth_type_trans(skb, dev);
+
+ if ( k_trust_collective_crc) skb->ip_summed = CHECKSUM_PARTIAL ;
+ stamp_checksum_place_in_skb(skb) ;
+
+
+ TRACE("Delivering skb->dev=%p skb->protocol=%d skb->pkt_type=%d skb->ip_summed=%d ",
+ skb->dev, skb->protocol, skb->pkt_type, skb->ip_summed ) ;
+ dumpmem(skb->data,skb->len,"Frame after stripping header") ;
+ dev->last_rx = jiffies;
+ bgnet->stats.rx_packets++;
+ bgnet->stats.rx_bytes += skb->len;
+
+ TRACE("bgnet_receive before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.tx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.tx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+ bgnet->stats.rx_packets, bgnet->stats.tx_packets, bgnet->stats.rx_bytes, bgnet->stats.tx_bytes, bgnet->stats.rx_frame_errors) ;
+ /* TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ; // Only tracing the torus ... */
+ #if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+ if( bgcol_diagnostic_use_napi)
+ {
+ {
+ TRACEN(k_t_napi|k_t_request,"netif_receive_skb(%p)",skb) ;
+ netif_receive_skb(skb) ;
+ }
+ }
+ else
+ {
+ netif_rx(skb);
+ }
+ #else
+ netif_rx(skb);
+ #endif
+ TRACE("bgnet_receive after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+ bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+ }
+ else
+ {
+ /* a discardable self-send */
+ dev_kfree_skb(skb) ;
+ }
+
+ }
+ TRACE("(<)") ;
+
+ return 0;
+}
+
+
+/* A packet gets to the IO node, and needs 'reflecting' to the compute node(s) that want it. */
+static int col_reflect(struct bg_col *bgcol, struct sk_buff *skb, struct bglink_hdr_col *lnkhdr,
+ struct bglink_proto* proto)
+{
+ TRACE("(>) col_reflect skb=%p lnkhdr=%p proto=%p", skb,lnkhdr,proto) ;
+ if( skb != NULL && lnkhdr != NULL && proto != NULL && -1 != (int) proto )
+ {
+ struct net_device *dev = (struct net_device*)proto->private;
+ struct bgnet_dev *bgnet = netdev_priv(dev);
+
+
+ TRACE("bgnet rcvd pkt for reflection: data=%p, len=%d, head=%d, tail=%d, res len=%d [%s:%d]",
+ skb->data, skb->len, lnkhdr->opt.opt_net.pad_head,
+ lnkhdr->opt.opt_net.pad_tail, skb->len - lnkhdr->opt.opt_net.pad_head - lnkhdr->opt.opt_net.pad_tail, __func__, __LINE__);
+
+/* if (skb->len % BGNET_FRAG_MTU != 0) */
+/* printk("bgnet: received packet size not multiple of %d\n", BGNET_FRAG_MTU); */
+
+ /* skb_pull and trim check for over/underruns. For 0 size the
+ * add/subtract is the same as a test */
+ __skb_pull(skb, lnkhdr->opt.opt_net.pad_head);
+ __skb_trim(skb, skb->len - lnkhdr->opt.opt_net.pad_tail);
+ /* A 'broadcast' packet needs delivering locally as well as reflecting */
+ {
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ if (is_broadcast_ether_addr(eth->h_dest)) {
+ struct sk_buff *localskb = skb_clone(skb, GFP_KERNEL);
+ if( localskb )
+ {
+ dumpmem(localskb->data,localskb->len,"Frame delivered via tree (broadcast reflection)") ;
+ localskb->dev = dev;
+ localskb->protocol = eth_type_trans(localskb, dev);
+
+ localskb->ip_summed = CHECKSUM_UNNECESSARY ; /* Packet was from tree, h/w verified it */
+
+ TRACE("Delivering localskb->dev=%p localskb->protocol=%d localskb->pkt_type=%d localskb->ip_summed=%d ",
+ localskb->dev, localskb->protocol, localskb->pkt_type, localskb->ip_summed ) ;
+ dumpmem(localskb->data,localskb->len,"Frame after stripping header") ;
+ dev->last_rx = jiffies;
+ bgnet->stats.rx_packets++;
+ bgnet->stats.rx_bytes += localskb->len;
+ TRACE("col_reflect before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+ bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+/* TRACEN(k_t_napi,"netif_rx(skb=%p)",localskb) ; // Only tracing the torus ... */
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+ if( bgcol_diagnostic_use_napi)
+ {
+ TRACEN(k_t_napi,"netif_receive_skb(%p)",localskb) ;
+ netif_receive_skb(localskb) ;
+ }
+ else
+ {
+ netif_rx(localskb);
+ }
+#else
+ netif_rx(localskb) ;
+#endif
+ TRACE("col_reflect after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+ bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+ }
+ }
+ }
+
+
+ /* dump_skb(skb); */
+ col_start_xmit(skb, dev) ;
+ }
+
+ TRACE("(<) col_reflect") ;
+
+ return 0;
+}
+
+/* A packet gets to the IO node, and needs 'reflecting' to the compute node(s) that want it. */
+static int col_reflect_trimmed(struct bg_col *bgcol, struct sk_buff *skb,
+ struct bglink_proto* proto, unsigned int src_key )
+{
+ TRACE("(>) col_reflect skb=%p proto=%p", skb,proto) ;
+ if( skb != NULL && proto != NULL && -1 != (int) proto )
+ {
+ struct net_device *dev = (struct net_device*)proto->private;
+ struct bgnet_dev *bgnet = netdev_priv(dev);
+
+
+ TRACE("bgnet rcvd pkt for reflection: data=%p, len=%d",
+ skb->data, skb->len);
+
+
+ /* A 'broadcast' packet needs delivering locally as well as reflecting */
+ {
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ if (is_broadcast_ether_addr(eth->h_dest)) {
+ struct sk_buff *localskb = skb_clone(skb, GFP_KERNEL);
+ if( localskb )
+ {
+ dumpmem(localskb->data,localskb->len,"Frame delivered via tree (broadcast reflection)") ;
+ localskb->dev = dev;
+ localskb->protocol = eth_type_trans(localskb, dev);
+
+ localskb->ip_summed = CHECKSUM_UNNECESSARY ; /* Packet was from tree, h/w verified it */
+
+ TRACE("Delivering localskb->dev=%p localskb->protocol=%d localskb->pkt_type=%d localskb->ip_summed=%d ",
+ localskb->dev, localskb->protocol, localskb->pkt_type, localskb->ip_summed ) ;
+ dumpmem(localskb->data,localskb->len,"Frame after stripping header") ;
+ dev->last_rx = jiffies;
+ bgnet->stats.rx_packets++;
+ bgnet->stats.rx_bytes += localskb->len;
+ TRACE("col_reflect before-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+ bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+/* TRACEN(k_t_napi,"netif_rx(skb=%p)",localskb) ; // Only tracing the torus ... */
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+ if( bgcol_diagnostic_use_napi)
+ {
+ TRACEN(k_t_napi,"netif_receive_skb(%p)",localskb) ;
+ netif_receive_skb(localskb) ;
+ }
+ else
+ {
+ netif_rx(localskb);
+ }
+#else
+ netif_rx(localskb) ;
+#endif
+ TRACE("col_reflect after-netif-rx bgnet->stats.rx_packets=%lu bgnet->stats.rx_bytes=%lu bgnet->stats.rx_frame_errors=%lu",
+ bgnet->stats.rx_packets, bgnet->stats.rx_bytes, bgnet->stats.rx_frame_errors) ;
+ }
+ }
+ }
+
+
+ /* dump_skb(skb); */
+ col_start_xmit(skb, dev) ;
+ }
+
+ TRACE("(<) col_reflect") ;
+
+ return 0;
+}
+
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static void bgnet_poll(struct net_device *dev)
+{
+ /* no-op; packets are fed by the col device */
+}
+#endif
+
+static inline int is_torus_ether_addr(const u8 *addr)
+{
+ return ((addr[0] & 0x7) == 0x6);
+}
+
+
+unsigned int find_xyz_address(unsigned int ip) ;
+
+
+static int bgnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ col_start_xmit(skb, dev) ;
+ return 0 ;
+}
+
+static void bgnet_uninit(struct net_device *dev)
+{
+ struct bgnet_dev *bgnet = netdev_priv(dev);
+
+ bglink_unregister_proto(&bgnet->lnk);
+ bglink_unregister_proto(&bgnet->lnkreflect);
+
+}
+
+static struct net_device_stats *bgnet_get_stats(struct net_device *dev)
+{
+ struct bgnet_dev* bgnet = netdev_priv(dev);
+
+ return &bgnet->stats;
+}
+
+
+static int bgnet_set_mac_addr(struct net_device* netDev,
+ void* p)
+{
+ struct sockaddr* addr = p;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EADDRNOTAVAIL;
+
+ memcpy(netDev->dev_addr, addr->sa_data, netDev->addr_len);
+
+ return 0;
+}
+
+
+static int bgnet_set_config(struct net_device* netDev,
+ struct ifmap* map)
+{
+ int rc = 0;
+ struct bgnet_dev* bgnet = netdev_priv(netDev);
+
+ /* Set this with ifconfig <interface> port <collective virtual channel> */
+ if (map->port)
+ bgnet->bgcol_channel = map->port;
+
+ /* Set this with ifconifg <interface> io_addr <collective route> */
+ if (map->base_addr)
+ bgnet->bgcol_route = map->base_addr;
+
+ return rc;
+}
+
+
+static int bgnet_init(struct net_device *dev)
+{
+ struct bgnet_dev *bgnet = netdev_priv(dev);
+ TRACE("(>) bgnet_init") ;
+ bgnet->bgcol_route = 0 /*15*/;
+#define ETH_COL_CHANNEL 0
+ bgnet->bgcol_channel = 0 ;
+/* bgnet->bgcol_channel = (bgnet_personality.Block_Config & BGP_PERS_BLKCFG_IPOverCollectiveVC) ? 1 : 0; */
+/* bgnet->eth_bridge_vector = -1; */
+/* bgnet->link_protocol = BGLINK_P_NET; */
+/* bgnet->net_device = dev; */
+
+ bgnet->bgcol = bgcol_get_dev();
+ TRACE("(=) bgnet->bgcol=%p",bgnet->bgcol) ;
+
+ if (!bgnet->bgcol)
+ return -1;
+
+ bgnet->bgcol->bgnet_channel = bgnet->bgcol_channel ;
+/* bgnet->phandle_tree = 3; */
+/* bgnet->phandle_torus = 0; */
+/* // bgnet->tree_route = 15; // 15 is 'partition flood' */
+/* bgnet->tree_route = 0 ; // 0 is 'compute to IO' or 'IO to compute' */
+/* bgnet->tree_channel = BGNET_TREE_CHANNEL ; */
+/* bgnet->eth_mask = 0; */
+/* // bgnet->eth_bridge_vector = 0; // route through the I/O node? (personality.Network_Config.IONodeRank) */
+/* bgnet->eth_bridge_vector = personality.Network_Config.IOnodeRank; // route through the I/O node? (personality.Network_Config.IONodeRank) */
+ bgnet->eth_bridge_vector = bgnet_personality.Network_Config.IOnodeRank; /* route through the I/O node? (personality.Network_Config.IONodeRank) */
+ bgnet->bgcol_protocol = 1;
+ bgnet->bgcol_reflector_protocol = 2 ; /* CN requests reflection from ION */
+
+/* bgnet->i_am_ionode = ( personality.Network_Config.IOnodeRank == personality.Network_Config.Rank) ; */
+#if 0
+ p = get_property(np, "local-mac-address", NULL);
+ if (p == NULL) {
+ printk(KERN_ERR "%s: Can't find local-mac-address property\n",
+ np->full_name);
+ goto err;
+ }
+ memcpy(dev->dev_addr, p, 6);
+#endif
+ dev->dev_addr[0] = 0x00;
+ dev->dev_addr[1] = 0x80;
+ *((unsigned*)(&dev->dev_addr[2])) = 0x46000000u | bgnet_personality.Network_Config.Rank; /* why 0x46yyyyyy ??? */
+
+ bgnet->bgcol_vector = *(unsigned int *)(&dev->dev_addr[2]);
+ bgnet->eth_local = bgnet->eth_mask & *(unsigned int *)&dev->dev_addr[0];
+
+ spin_lock(&bgnet_lock);
+ if (list_empty(&bgnet_list)) {
+ /* register with col */
+/* bgnet_lnk.lnk_proto = bgnet->link_protocol; */
+/* bgnet_lnk.receive_from_self = 0; */
+/* bgnet_lnk.col_rcv = col_receive; */
+/* bgnet_lnk.col_flush = col_flush; */
+/* bgnet_lnk.torus_rcv = torus_receive; */
+/* bglink_register_proto(&bgnet_lnk); */
+ bgnet->lnk.lnk_proto = bgnet->bgcol_protocol;
+ bgnet->lnk.col_rcv = bgnet_receive;
+ bgnet->lnk.col_rcv_trimmed = bgnet_receive_trimmed;
+ bgnet->lnk.private = dev;
+ bglink_register_proto(&bgnet->lnk);
+
+ bgnet->lnkreflect.lnk_proto = bgnet->bgcol_reflector_protocol;
+ bgnet->lnkreflect.col_rcv = col_reflect;
+ bgnet->lnkreflect.col_rcv_trimmed = col_reflect_trimmed;
+ bgnet->lnkreflect.private = dev;
+ bglink_register_proto(&bgnet->lnkreflect);
+
+ /* Hook for the tree interrupt handler to find the 'bgnet' */
+ bgnet->bgcol->bgnet = bgnet ;
+ }
+/* list_add_rcu(&bgnet->list, &bgnet_list); */
+/* */
+/* spin_unlock(&bgnet_lock); */
+/* */
+/* skb_queue_head_init(&bgnet->pending_skb_list); */
+ bgcol_enable_interrupts(bgnet->bgcol) ; /* Should be able to run tree interrupts now */
+
+
+ TRACE("(<) bgnet_init") ;
+ return 0;
+}
+
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+static int bgnet_poll_napi(struct napi_struct * napi, int budget)
+{
+ TRACEN(k_t_napi,"(>) napi=%p budget%d",napi,budget) ;
+ bgcol_duplex_slih(0) ;
+ TRACEN(k_t_napi,"(<)") ;
+ return 0 ;
+}
+#endif
+
+#if defined(HAVE_NET_DEVICE_OPS)
+static const struct net_device_ops netdev_ops = {
+ .ndo_change_mtu = bgnet_change_mtu ,
+ .ndo_get_stats = bgnet_get_stats ,
+ .ndo_start_xmit = bgnet_start_xmit ,
+ .ndo_init = bgnet_init ,
+ .ndo_uninit = bgnet_uninit ,
+ .ndo_open = bgnet_open ,
+ .ndo_stop = bgnet_stop ,
+ .ndo_set_config = bgnet_set_config ,
+ .ndo_set_mac_address = bgnet_set_mac_addr,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_poll_controller = bgnet_poll,
+#endif
+};
+#endif
+static int __init
+bgnet_module_init(void)
+{
+ struct bgnet_dev *bgnet;
+ struct net_device *dev;
+
+ TRACEN(k_t_general, "(>) bgnet_module_init") ;
+ dev = alloc_etherdev(sizeof(struct bgnet_dev));
+ TRACEN(k_t_general, "(=) bgnet_module_init dev=%p", dev) ;
+ if (!dev)
+ return -ENOMEM;
+
+/* SET_MODULE_OWNER(dev); // Anachronism */
+
+ /* Read personality. */
+ bluegene_getPersonality((void*) &bgnet_personality, sizeof(bgnet_personality));
+ bgnet = (struct bgnet_dev*) netdev_priv(dev);
+ memset(bgnet, 0, sizeof(*bgnet));
+ bgcol_module_init() ;
+/* bgnet_init(dev); */
+
+/* // Set the MAC address for this interface. */
+/* if (bluegene_isIONode()) { */
+/* unsigned char ipOctet2 = (bgnet_personality.Ethernet_Config.IPAddress.octet[13] + 1) & 0xfc; */
+/* */
+/* dev->dev_addr[0] = ipOctet2 | 2; */
+/* dev->dev_addr[1] = bgnet_personality.Ethernet_Config.IPAddress.octet[14]; */
+/* dev->dev_addr[2] = bgnet_personality.Ethernet_Config.IPAddress.octet[15]; */
+/* dev->dev_addr[3] = ((bgnet_personality.Network_Config.Rank >> 16) & 0x3f) | (ipOctet2 << 6); */
+/* dev->dev_addr[4] = (unsigned char) ((bgnet_personality.Network_Config.Rank >> 8)); */
+/* dev->dev_addr[5] = (unsigned char) bgnet_personality.Network_Config.Rank; */
+/* } else */
+/* memcpy(dev->dev_addr, bgnet_personality.Ethernet_Config.EmacID, sizeof(dev->dev_addr)); */
+
+#if defined(HAVE_NET_DEVICE_OPS)
+ dev->netdev_ops = &netdev_ops ;
+#else
+ dev->init = bgnet_init;
+ dev->uninit = bgnet_uninit;
+ dev->get_stats = bgnet_get_stats;
+ dev->hard_start_xmit = bgnet_start_xmit;
+ dev->change_mtu = bgnet_change_mtu;
+ dev->open = bgnet_open;
+ dev->stop = bgnet_stop;
+ dev->set_config = bgnet_set_config;
+ dev->set_mac_address = bgnet_set_mac_addr;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ dev->poll_controller = bgnet_poll;
+#endif
+#endif
+ dev->mtu = BGNET_DEFAULT_MTU;
+
+/* Tried turning checksum generation off, but this resulted in packets routed off the BGP not having checksums */
+/* and lack of interoperability with front-end nodes */
+/* (try CHECKSUM_PARTIAL above to see if the TOMAL will generate an IP checksum in this circumstance) */
+ dev->features = k_trust_collective_crc
+ ? (NETIF_F_HIGHDMA | NETIF_F_NO_CSUM)
+ : NETIF_F_HIGHDMA ;
+/* if( k_trust_collective_crc) */
+/* { */
+/* dev->features = NETIF_F_HIGHDMA | NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM ; */
+/* } */
+/* else */
+/* { */
+/* dev->features = NETIF_F_HIGHDMA ; */
+/* } */
+
+/* #if defined(TRUST_TREE_CRC) */
+/* dev->features = NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA ; */
+/* #endif */
+/* dev->features |= NETIF_F_NO_CSUM; */
+
+ TRACEN(k_t_general,"(=) dev->name=%s",
+ dev->name
+ ) ;
+ {
+ int rc = register_netdev(dev) ;
+ TRACEN(k_t_general, "(=) bgnet_module_init register_netdev rc=%d", rc) ;
+ if( rc != 0 )
+ goto err;
+ }
+
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+ netif_napi_add(dev,&bgnet->napi, bgnet_poll_napi, k_collective_budget) ;
+ napi_enable(&bgnet->napi) ;
+#endif
+ /* increase header size to fit torus hardware header */
+/* if (bgnet->torus) */
+/* dev->hard_header_len += 16; */
+
+ if (bgnet->eth_bridge_vector != -1)
+ printk(KERN_INFO " bridge 0x%06x\n", bgnet->eth_bridge_vector);
+
+ TRACEN(k_t_general, "(<) bgnet_module_init rc=0") ;
+ return 0;
+
+ err:
+ free_netdev(dev);
+ TRACEN(k_t_general, "(<) bgnet_module_init err rc=-1") ;
+ return -1;
+}
+
+
+/* static void __exit */
+/* bgnet_module_exit (void) */
+/* { */
+/* return; */
+/* } */
+
+module_init(bgnet_module_init);
+/* module_exit(bgnet_module_exit); */
diff --git a/drivers/net/bgp_collective/bgnet.h b/drivers/net/bgp_collective/bgnet.h
new file mode 100644
index 00000000000000..48748d0a2425ae
--- /dev/null
+++ b/drivers/net/bgp_collective/bgnet.h
@@ -0,0 +1,152 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Volkmar Uhlig <vuhlig@us.ibm.com>
+ * Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: definitions for BG networks
+ *
+ *
+ ********************************************************************/
+
+#ifndef __DRIVERS__NET__BLUEGENE__BGNET_H__
+#define __DRIVERS__NET__BLUEGENE__BGNET_H__
+
+/* static inline unsigned int BG_IRQ(unsigned int group, unsigned int irq) */
+/* { */
+/* return ((group) << 5 | (irq)) ; */
+/* } */
+/* #define BG_IRQ(group, irq) ((group) << 5 | (irq)) */
+
+
+/**********************************************************************
+ * link layer
+ **********************************************************************/
+
+/* enum { */
+/* BGNET_P_ETH0 = 1 , */
+/* BGNET_P_ETH1 = 2 , */
+/* BGNET_P_ETH2 = 3 , */
+/* BGNET_P_ETH3 = 4 , */
+/* BGNET_P_ETH4 = 5 , */
+/* BGNET_P_ETH5 = 6 , */
+/* BGNET_P_ETH6 = 7 , */
+/* BGNET_P_ETH7 = 8 , */
+/* BGNET_P_ETH8 = 9 , */
+/* BGNET_P_LAST_ETH = BGNET_P_ETH8 , */
+/* BGNET_P_CONSOLE = 20 */
+/* }; */
+/* //#define BGNET_P_ETH0 1 */
+/* //#define BGNET_P_ETH1 2 */
+/* //#define BGNET_P_ETH2 3 */
+/* //#define BGNET_P_ETH3 4 */
+/* //#define BGNET_P_ETH4 5 */
+/* //#define BGNET_P_ETH5 6 */
+/* //#define BGNET_P_ETH6 7 */
+/* //#define BGNET_P_ETH7 8 */
+/* //#define BGNET_P_ETH8 9 */
+/* //#define BGNET_P_LAST_ETH BGNET_P_ETH8 */
+/* // */
+/* //#define BGNET_P_CONSOLE 20 */
+
+/* Facility for using multiple cores in support of 'collective', only make it happen if multiple cores are available ... */
+#if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_VRNIC)
+#define COLLECTIVE_TREE_AFFINITY
+#endif
+
+#if defined(COLLECTIVE_TREE_AFFINITY)
+/* On IO nodes, 10gE will be using core 0. On Compute nodes, torus will be using core 2. So exploit cores 1 and 3 for collective ... */
+enum {
+ k_TreeAffinityCPU = 1 ,
+ k_WorkqueueDeliveryCPU = 3
+};
+#else
+enum {
+ k_TreeAffinityCPU = 0 ,
+ k_WorkqueueDeliveryCPU = 0
+};
+#endif
+
+
+enum {
+ BGNET_FRAG_MTU = 240 ,
+/* BGNET_MAX_MTU = BGNET_FRAG_MTU * 128 , */
+ BGNET_DEFAULT_MTU = ETH_DATA_LEN
+};
+/* #define BGNET_FRAG_MTU 240 */
+/* #define BGNET_MAX_MTU (BGNET_FRAG_MTU * 128) */
+/* //#define BGNET_DEFAULT_MTU (BGNET_FRAG_MTU * 30 - 12) */
+/* #define BGNET_DEFAULT_MTU ETH_DATA_LEN */
+
+/* // Which bgcol channel to use for the driver */
+/* #define BGNET_TREE_CHANNEL 0 */
+
+enum {
+ k_trust_collective_crc =
+#if defined(BGP_COLLECTIVE_IP_CHECKSUM)
+ 0
+#else
+ 1
+#endif
+ /* Whether the IP layer should trust the BGP hardware CRC on the collective network */
+};
+
+enum {
+ k_collective_budget = 1000 /* Number of frames we are willing to collect from the tree before we 'yield' */
+};
+
+enum {
+ k_deliver_via_workqueue = 1 /* Whether to deliver via a work queue (on another core) */
+};
+struct bgnet_dev
+{
+ struct bg_col *bgcol;
+ unsigned int bgcol_route;
+ unsigned int bgcol_channel;
+ unsigned short bgcol_protocol;
+ unsigned short bgcol_reflector_protocol ;
+ unsigned int bgcol_vector;
+ unsigned int eth_mask;
+ unsigned int eth_local;
+ unsigned int eth_bridge_vector;
+ struct bglink_proto lnk;
+ struct bglink_proto lnkreflect;
+ struct net_device_stats stats;
+ u32 phandle_bgcol;
+ u32 phandle_torus;
+ struct sk_buff_head xmit_list ; /* List of skb's to be sent */
+#if defined(CONFIG_BGP_COLLECTIVE_NAPI)
+ struct napi_struct napi ;
+#endif
+/* unsigned int i_am_ionode ; */
+};
+
+extern inline unsigned int eth_to_key(char *addr)
+{
+ unsigned int key;
+ if (is_broadcast_ether_addr(addr))
+ key = ~0U;
+ else
+ key = (addr[3] << 16) | (addr[4] << 8) | (addr[5] << 0);
+ return key;
+}
+
+
+/* extern struct list_head bglink_proto; */
+/* extern struct bglink_proto bgnet_eth; */
+
+#endif /* !__DRIVERS__NET__BLUEGENE__BGNIC_H__ */
diff --git a/drivers/net/bgp_collective/bgp_dcr.h b/drivers/net/bgp_collective/bgp_dcr.h
new file mode 100644
index 00000000000000..f1f60c24436dc1
--- /dev/null
+++ b/drivers/net/bgp_collective/bgp_dcr.h
@@ -0,0 +1,1041 @@
+/*********************************************************************
+ *
+ * Description: BGP DCR map (copied from bpcore)
+ *
+ * Copyright (c) 2007, 2008 International Business Machines
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ ********************************************************************/
+
+#ifndef _BGP_DCR_H_
+#define _BGP_DCR_H_
+
+#define _BN(b) ((1<<(31-(b))))
+#define _B1(b,x) (((x)&0x1)<<(31-(b)))
+#define _B2(b,x) (((x)&0x3)<<(31-(b)))
+#define _B3(b,x) (((x)&0x7)<<(31-(b)))
+#define _B4(b,x) (((x)&0xF)<<(31-(b)))
+#define _B5(b,x) (((x)&0x1F)<<(31-(b)))
+#define _B6(b,x) (((x)&0x3F)<<(31-(b)))
+#define _B7(b,x) (((x)&0x7F)<<(31-(b)))
+#define _B8(b,x) (((x)&0xFF)<<(31-(b)))
+#define _B9(b,x) (((x)&0x1FF)<<(31-(b)))
+#define _B10(b,x) (((x)&0x3FF)<<(31-(b)))
+#define _B11(b,x) (((x)&0x7FF)<<(31-(b)))
+#define _B12(b,x) (((x)&0xFFF)<<(31-(b)))
+#define _B13(b,x) (((x)&0x1FFF)<<(31-(b)))
+#define _B14(b,x) (((x)&0x3FFF)<<(31-(b)))
+#define _B15(b,x) (((x)&0x7FFF)<<(31-(b)))
+#define _B16(b,x) (((x)&0xFFFF)<<(31-(b)))
+#define _B17(b,x) (((x)&0x1FFFF)<<(31-(b)))
+#define _B18(b,x) (((x)&0x3FFFF)<<(31-(b)))
+#define _B19(b,x) (((x)&0x7FFFF)<<(31-(b)))
+#define _B20(b,x) (((x)&0xFFFFF)<<(31-(b)))
+#define _B21(b,x) (((x)&0x1FFFFF)<<(31-(b)))
+#define _B22(b,x) (((x)&0x3FFFFF)<<(31-(b)))
+#define _B23(b,x) (((x)&0x7FFFFF)<<(31-(b)))
+#define _B24(b,x) (((x)&0xFFFFFF)<<(31-(b)))
+#define _B25(b,x) (((x)&0x1FFFFFF)<<(31-(b)))
+#define _B26(b,x) (((x)&0x3FFFFFF)<<(31-(b)))
+#define _B27(b,x) (((x)&0x7FFFFFF)<<(31-(b)))
+#define _B28(b,x) (((x)&0xFFFFFFF)<<(31-(b)))
+#define _B29(b,x) (((x)&0x1FFFFFFF)<<(31-(b)))
+#define _B30(b,x) (((x)&0x3FFFFFFF)<<(31-(b)))
+#define _B31(b,x) (((x)&0x7FFFFFFF)<<(31-(b)))
+
+#if 0
+#define _BGP_DCR_BIC (0x000) /* 0x000-0x1ff: BIC (includes MCCU functionality) */
+#define _BGP_DCR_BIC_END (_BGP_DCR_BIC + 0x1FF) /* 0x1ff: BIC (includes MCCU functionality) */
+
+#define _BGP_DCR_SERDES (0x200) /* 0x200-0x3ff: Serdes Config */
+#define _BGP_DCR_SERDES_END (_BGP_DCR_SERDES + 0x1FF) /* 0x3ff: Serdes Config End */
+
+#define _BGP_DCR_TEST (0x400) /* 0x400-0x47f: Test Interface */
+#define _BGP_DCR_TEST_END (_BGP_DCR_TEST + 0x07F) /* 0x400-0x47f: Test Interface End */
+
+#define _BGP_DCR_L30 (0x500) /* 0x500-0x53f: L3-Cache 0 */
+#define _BGP_DCR_L30_END (_BGP_DCR_L30 + 0x03F) /* 0x53f: L3-Cache 0 End */
+
+#define _BGP_DCR_L31 (0x540) /* 0x540-0x57f: L3-Cache 1 */
+#define _BGP_DCR_L31_END (_BGP_DCR_L31 + 0x03F) /* 0x57f: L3-Cache 1 End */
+
+#define _BGP_DCR_XAUI (0x580) /* 0x580-0x5bf: XAUI config */
+#define _BGP_DCR_XAUI_END (_BGP_DCR_XAUI + 0x03F) /* 0x5bf: XAUI config End */
+
+#define _BGP_DCR_SRAM (0x610) /* 0x610-0x61f: SRAM unit (Includes Lockbox functionality) */
+#define _BGP_DCR_SRAM_END (_BGP_DCR_SRAM + 0x00F) /* 0x61f: SRAM unit (Includes Lockbox functionality) */
+
+#define _BGP_DCR_DEVBUS (0x620) /* 0x620-0x62f: DevBus Arbiter */
+#define _BGP_DCR_DEVBUS_END (_BGP_DCR_DEVBUS + 0x00F) /* 0x62f: DevBus Arbiter End */
+
+#define _BGP_DCR_NETBUS (0x630) /* 0x630-0x63f: NetBus Arbiter */
+#define _BGP_DCR_NETBUS_END (_BGP_DCR_NETBUS + 0x00F) /* 0x63f: NetBus Arbiter End */
+
+#define _BGP_DCR_DMAARB (0x640) /* 0x640-0x64f: DMA arbiter (former PLB slave) */
+#define _BGP_DCR_DMAARB_END (_BGP_DCR_DMAARB + 0x00F) /* 0x64f: DMA arbiter (former PLB slave) End */
+
+#define _BGP_DCR_DCRARB (0x650) /* 0x650-0x65f: DCR arbiter */
+#define _BGP_DCR_DCRARB_END (_BGP_DCR_DCRARB + 0x00F) /* 0x65f: DCR arbiter End */
+
+#define _BGP_DCR_GLOBINT (0x660) /* 0x660-0x66F: Global Interrupts */
+#define _BGP_DCR_GLOBINT_END (_BGP_DCR_GLOBINT + 0x00F) /* 0x66F: Global Interrupts End */
+
+#define _BGP_DCR_CLOCKSTOP (0x670) /* 0x670-0x67F: Clock Stop */
+#define _BGP_DCR_CLOCKSTOP_END (_BGP_DCR_CLOCKSTOP + 0x00F) /* 0x67F: Clock Stop End */
+
+#define _BGP_DCR_ENVMON (0x680) /* 0x670-0x67F: Environmental Monitor */
+#define _BGP_DCR_ENVMON_END (_BGP_DCR_ENVMON + 0x00F) /* 0x67F: Env Mon End */
+
+#define _BGP_DCR_FPU (0x700) /* 0x700-0x77f: Hummer3 00/01/10/11 */
+#define _BGP_DCR_FPU_END (_BGP_DCR_FPU + 0x07F) /* 0x77f: Hummer3 00/01/10/11 End */
+
+#define _BGP_DCR_L2 (0x780) /* 0x780-0x7ff: L2-Cache 00/01/10/11 */
+#define _BGP_DCR_L2_END (_BGP_DCR_L2 + 0x07F) /* 0x7ff: L2-Cache 00/01/10/11 End */
+
+#define _BGP_DCR_SNOOP (0x800) /* 0x800-0xbff: Snoop 00/01/10/11 */
+#define _BGP_DCR_SNOOP0 (0x800) /* 0x800-0x8ff: Snoop 00 */
+#define _BGP_DCR_SNOOP1 (0x900) /* 0x900-0x9ff: Snoop 01 */
+#define _BGP_DCR_SNOOP2 (0xA00) /* 0xa00-0xaff: Snoop 10 */
+#define _BGP_DCR_SNOOP3 (0xB00) /* 0xb00-0xbff: Snoop 11 */
+#define _BGP_DCR_SNOOP_END (_BGP_DCR_SNOOP + 0x3FF) /* 0xbff: Snoop 00/01/10/11 End */
+
+#define _BGP_DCR_COL (0xc00) /* 0xc00-0xc7f: Tree */
+#define _BGP_DCR_COL_END (_BGP_DCR_COL + 0x07F) /* 0xc7f: Tree End */
+
+#define _BGP_DCR_TORUS (0xc80) /* 0xc80-0xcff: Torus */
+#define _BGP_DCR_TORUS_END (_BGP_DCR_TORUS + 0x07F) /* 0xcff: Torus End */
+
+#define _BGP_DCR_DMA (0xd00) /* 0xd00-0xdff: DMA */
+#define _BGP_DCR_DMA_END (_BGP_DCR_DMA + 0x0FF) /* 0xdff: DMA End */
+
+#define _BGP_DCR_DDR0 (0xe00) /* 0xe00-0xeff: DDR controller 0 */
+#define _BGP_DCR_DDR0_END (_BGP_DCR_DDR0 + 0x0FF) /* 0xeff: DDR controller 0 End */
+
+#define _BGP_DCR_DDR1 (0xf00) /* 0xf00-0xfff: DDR controller 1 */
+#define _BGP_DCR_DDR1_END (_BGP_DCR_DDR1 + 0x0FF) /* 0xfff: DDR controller 1 End */
+
+#endif
+
+/*
+ * Tree
+ */
+
+#define _BGP_TRx_DI (0x00) /* Offset from Tree VCx for Data Injection (WO,Quad) */
+#define _BGP_TRx_HI (0x10) /* Offset from Tree VCx for Header Injection (WO,Word) */
+#define _BGP_TRx_DR (0x20) /* Offset from Tree VCx for Data Reception (RO,Quad) */
+#define _BGP_TRx_HR (0x30) /* Offset from Tree VCx for Header Reception (RO,Word) */
+#define _BGP_TRx_Sx (0x40) /* Offset from Tree VCx for Status (RO,Word) */
+#define _BGP_TRx_SO (0x50) /* Offset from Tree VCx for Status of Other VC (RO,Word) */
+
+/* Virtual Addresses for Tree VC0 */
+#define _BGP_TR0_DI (_BGP_VA_COL0 | _BGP_TRx_DI)
+#define _BGP_TR0_HI (_BGP_VA_COL0 | _BGP_TRx_HI)
+#define _BGP_TR0_DR (_BGP_VA_COL0 | _BGP_TRx_DR)
+#define _BGP_TR0_HR (_BGP_VA_COL0 | _BGP_TRx_HR)
+#define _BGP_TR0_S0 (_BGP_VA_COL0 | _BGP_TRx_Sx)
+#define _BGP_TR0_S1 (_BGP_VA_COL0 | _BGP_TRx_SO)
+
+/* Virtual Addresses for Tree VC1 */
+#define _BGP_TR1_DI (_BGP_VA_COL1 | _BGP_TRx_DI)
+#define _BGP_TR1_HI (_BGP_VA_COL1 | _BGP_TRx_HI)
+#define _BGP_TR1_DR (_BGP_VA_COL1 | _BGP_TRx_DR)
+#define _BGP_TR1_HR (_BGP_VA_COL1 | _BGP_TRx_HR)
+#define _BGP_TR1_S1 (_BGP_VA_COL1 | _BGP_TRx_Sx)
+#define _BGP_TR1_S0 (_BGP_VA_COL1 | _BGP_TRx_SO)
+
+/* Packet Payload: fixed size for all Tree packets */
+#define _BGP_COL_PKT_MAX_BYTES (256) /* bytes in a tree packet */
+#define _BGP_COL_PKT_MAX_SHORT (128)
+#define _BGP_COL_PKT_MAX_LONG (64)
+#define _BGP_COL_PKT_MAX_LONGLONG (32)
+#define _BGP_COL_PKT_MAX_QUADS (16) /* quads in a tree packet */
+
+
+/* Packet header */
+#define _BGP_TR_HDR_CLASS(x) _B4( 3,x) /* Packet class (virtual tree) */
+#define _BGP_TR_HDR_P2P _BN( 4) /* Point-to-point enable */
+#define _BGP_TR_HDR_IRQ _BN( 5) /* Interrupt request (at receiver) enable */
+#define _BGP_TR_HDR_OPCODE(x) _B3( 8,x) /* ALU opcode */
+#define _BGP_TR_OP_NONE 0x0 /* No operand. Use for ordinary routed packets. */
+#define _BGP_TR_OP_OR 0x1 /* Bitwise logical OR. */
+#define _BGP_TR_OP_AND 0x2 /* Bitwise logical AND. */
+#define _BGP_TR_OP_XOR 0x3 /* Bitwise logical XOR. */
+#define _BGP_TR_OP_MAX 0x5 /* Unsigned integer maximum. */
+#define _BGP_TR_OP_ADD 0x6 /* Unsigned integer addition. */
+#define _BGP_TR_HDR_OPSIZE(x) _B7(15,x) /* Operand size (# of 16-bit words minus 1) */
+#define _BGP_TR_HDR_TAG(x) _B14(29,x) /* User-specified tag (for ordinary routed packets only) */
+#define _BGP_TR_HDR_NADDR(x) _B24(29,x) /* Target address (for P2P packets only) */
+#define _BGP_TR_HDR_CSUM(x) _B2(31,x) /* Injection checksum mode */
+#define _BGP_TR_CSUM_NONE 0x0 /* Do not include packet in checksums. */
+#define _BGP_TR_CSUM_SOME 0x1 /* Include header in header checksum. Include all but */
+ /* first quadword in payload checksum. */
+#define _BGP_TR_CSUM_CFG 0x2 /* Include header in header checksum. Include all but */
+ /* specified number of 16-bit words in payload checksum. */
+#define _BGP_TR_CSUM_ALL 0x3 /* Include entire packet in checksums. */
+
+/* Packet status */
+#define _BGP_TR_STAT_IPY_CNT(x) _B8( 7,x) /* Injection payload qword count */
+#define _BGP_TR_STAT_IHD_CNT(x) _B4(15,x) /* Injection header word count */
+#define _BGP_TR_STAT_RPY_CNT(x) _B8(23,x) /* Reception payload qword count */
+#define _BGP_TR_STAT_IRQ _BN(27) /* One or more reception headers with IRQ bit set */
+#define _BGP_TR_STAT_RHD_CNT(x) _B4(31,x) /* Reception header word count */
+
+/* Tree Map of DCR Groupings */
+#define _BGP_DCR_TR_CLASS (_BGP_DCR_COL + 0x00) /* Class Definition Registers (R/W) */
+#define _BGP_DCR_TR_DMA (_BGP_DCR_COL + 0x0C) /* Network Port Diagnostic Memory Access Registers (R/W) */
+#define _BGP_DCR_TR_ARB (_BGP_DCR_COL + 0x10) /* Arbiter Control Registers (R/W) */
+#define _BGP_DCR_TR_CH0 (_BGP_DCR_COL + 0x20) /* Channel 0 Control Registers (R/W) */
+#define _BGP_DCR_TR_CH1 (_BGP_DCR_COL + 0x28) /* Channel 1 Control Registers (R/W) */
+#define _BGP_DCR_TR_CH2 (_BGP_DCR_COL + 0x30) /* Channel 2 Control Registers (R/W) */
+#define _BGP_DCR_TR_GLOB (_BGP_DCR_COL + 0x40) /* Global Registers (R/W) */
+#define _BGP_DCR_TR_REC (_BGP_DCR_COL + 0x44) /* Processor Reception Registers (R/W) */
+#define _BGP_DCR_TR_INJ (_BGP_DCR_COL + 0x48) /* Processor Injection Registers (R/W) */
+#define _BGP_DCR_TR_LCRC (_BGP_DCR_COL + 0x50) /* Link CRC's */
+#define _BGP_DCR_TR_ERR (_BGP_DCR_COL + 0x60) /* Internal error counters */
+
+
+/* Tree Class Registers */
+/* Note: each route descriptor register contains two class descriptors. "LO" will refer to the lower-numbered */
+/* of the two and "HI" will refer to the higher numbered. */
+#define _BGP_DCR_TR_CLASS_RDR0 (_BGP_DCR_TR_CLASS + 0x00) /* CLASS: Route Descriptor Register for classes 0, 1 */
+#define _BGP_DCR_TR_CLASS_RDR1 (_BGP_DCR_TR_CLASS + 0x01) /* CLASS: Route Descriptor Register for classes 2, 3 */
+#define _BGP_DCR_TR_CLASS_RDR2 (_BGP_DCR_TR_CLASS + 0x02) /* CLASS: Route Descriptor Register for classes 4, 5 */
+#define _BGP_DCR_TR_CLASS_RDR3 (_BGP_DCR_TR_CLASS + 0x03) /* CLASS: Route Descriptor Register for classes 6, 7 */
+#define _BGP_DCR_TR_CLASS_RDR4 (_BGP_DCR_TR_CLASS + 0x04) /* CLASS: Route Descriptor Register for classes 8, 9 */
+#define _BGP_DCR_TR_CLASS_RDR5 (_BGP_DCR_TR_CLASS + 0x05) /* CLASS: Route Descriptor Register for classes 10, 11 */
+#define _BGP_DCR_TR_CLASS_RDR6 (_BGP_DCR_TR_CLASS + 0x06) /* CLASS: Route Descriptor Register for classes 12, 13 */
+#define _BGP_DCR_TR_CLASS_RDR7 (_BGP_DCR_TR_CLASS + 0x07) /* CLASS: Route Descriptor Register for classes 14, 15 */
+#define _TR_CLASS_RDR_LO_SRC2 _BN( 1) /* Class low, source channel 2 */
+#define _TR_CLASS_RDR_LO_SRC1 _BN( 2) /* Class low, source channel 1 */
+#define _TR_CLASS_RDR_LO_SRC0 _BN( 3) /* Class low, source channel 0 */
+#define _TR_CLASS_RDR_LO_TGT2 _BN( 5) /* Class low, target channel 2 */
+#define _TR_CLASS_RDR_LO_TGT1 _BN( 6) /* Class low, target channel 1 */
+#define _TR_CLASS_RDR_LO_TGT0 _BN( 7) /* Class low, target channel 0 */
+#define _TR_CLASS_RDR_LO_SRCL _BN(14) /* Class low, source local client (injection) */
+#define _TR_CLASS_RDR_LO_TGTL _BN(15) /* Class low, target local client (reception) */
+#define _TR_CLASS_RDR_HI_SRC2 _BN(17) /* Class high, source channel 2 */
+#define _TR_CLASS_RDR_HI_SRC1 _BN(18) /* Class high, source channel 1 */
+#define _TR_CLASS_RDR_HI_SRC0 _BN(19) /* Class high, source channel 0 */
+#define _TR_CLASS_RDR_HI_TGT2 _BN(21) /* Class high, target channel 2 */
+#define _TR_CLASS_RDR_HI_TGT1 _BN(22) /* Class high, target channel 1 */
+#define _TR_CLASS_RDR_HI_TGT0 _BN(23) /* Class high, target channel 0 */
+#define _TR_CLASS_RDR_HI_SRCL _BN(30) /* Class high, source local client (injection) */
+#define _TR_CLASS_RDR_HI_TGTL _BN(31) /* Class high, target local client (reception) */
+#define _BGP_DCR_TR_CLASS_ISRA (_BGP_DCR_TR_CLASS + 0x08) /* CLASS: Bits 0-31 of 64-bit idle pattern */
+#define _BGP_DCR_TR_CLASS_ISRB (_BGP_DCR_TR_CLASS + 0x09) /* CLASS: Bits 32-63 of 64-bit idle pattern */
+
+/* Tree Network Port Diagnostic Memory Access Registers */
+/* Note: Diagnostic access to processor injection and reception fifos is through TR_REC and TR_INJ registers. */
+#define _BGP_DCR_TR_DMA_DMAA (_BGP_DCR_TR_DMA + 0x00) /* DMA: Diagnostic SRAM address */
+#define _TR_DMA_DMAA_TGT(x) _B3(21,x) /* Target */
+#define _TR_DMAA_TGT_RCV0 0x0 /* Channel 0 receiver */
+#define _TR_DMAA_TGT_RCV1 0x1 /* Channel 1 receiver */
+#define _TR_DMAA_TGT_RCV2 0x2 /* Channel 2 receiver */
+#define _TR_DMAA_TGT_SND0 0x4 /* Channel 0 sender */
+#define _TR_DMAA_TGT_SND1 0x5 /* Channel 1 sender */
+#define _TR_DMAA_TGT_SND2 0x6 /* Channel 2 sender */
+#define _TR_DMA_DMAA_VC(x) _B1(22,x) /* Virtual channel */
+#define _TR_DMA_DMAA_PCKT(x) _B2(24,x) /* Packet number */
+#define _TR_DMA_DMAA_WORD(x) _B7(31,x) /* Word offset within packet */
+#define _BGP_DCR_TR_DMA_DMAD (_BGP_DCR_TR_DMA + 0x01) /* DMA: Diagnostic SRAM data */
+#define _BGP_DCR_TR_DMA_DMADI (_BGP_DCR_TR_DMA + 0x02) /* DMA: Diagnostic SRAM data with address increment */
+#define _TR_DMA_DMAD_ECC(x) _B6(15,x) /* ECC */
+#define _TR_DMA_DMAD_DATA(x) _B16(31,x) /* Data */
+#define _BGP_DCR_TR_DMA_DMAH (_BGP_DCR_TR_DMA + 0x03) /* DMA: Diagnostic header access */
+
+/* Tree Arbiter Control Registers */
+#define _BGP_DCR_TR_ARB_RCFG (_BGP_DCR_TR_ARB + 0x00) /* ARB: General router configuration */
+#define _TR_ARB_RCFG_SRC00 _BN( 0) /* Disable source channel 0, VC0 */
+#define _TR_ARB_RCFG_SRC01 _BN( 1) /* Disable source channel 0, VC1 */
+#define _TR_ARB_RCFG_TGT00 _BN( 2) /* Disable target channel 0, VC0 */
+#define _TR_ARB_RCFG_TGT01 _BN( 3) /* Disable target channel 0, VC1 */
+#define _TR_ARB_RCFG_SRC10 _BN( 4) /* Disable source channel 1, VC0 */
+#define _TR_ARB_RCFG_SRC11 _BN( 5) /* Disable source channel 1, VC1 */
+#define _TR_ARB_RCFG_TGT10 _BN( 6) /* Disable target channel 1, VC0 */
+#define _TR_ARB_RCFG_TGT11 _BN( 7) /* Disable target channel 1, VC1 */
+#define _TR_ARB_RCFG_SRC20 _BN( 8) /* Disable source channel 2, VC0 */
+#define _TR_ARB_RCFG_SRC21 _BN( 9) /* Disable source channel 2, VC1 */
+#define _TR_ARB_RCFG_TGT20 _BN(10) /* Disable target channel 2, VC0 */
+#define _TR_ARB_RCFG_TGT21 _BN(11) /* Disable target channel 2, VC1 */
+#define _TR_ARB_RCFG_LB2 _BN(25) /* Channel 2 loopback enable */
+#define _TR_ARB_RCFG_LB1 _BN(26) /* Channel 1 loopback enable */
+#define _TR_ARB_RCFG_LB0 _BN(27) /* Channel 0 loopback enable */
+#define _TR_ARB_RCFG_TOM(x) _B2(29,x) /* Timeout mode */
+#define _TR_RCFG_TOM_NONE 0x0 /* Disable. */
+#define _TR_RCFG_TOM_NRML 0x1 /* Normal mode, irq enabled. */
+#define _TR_RCFG_TOM_WD 0x2 /* Watchdog mode, irq enabled. */
+#define _TR_ARB_RCFG_MAN _BN(30) /* Manual mode (router is disabled). */
+#define _TR_ARB_RCFG_RST _BN(31) /* Full arbiter reset. */
+#define _BGP_DCR_TR_ARB_RTO (_BGP_DCR_TR_ARB + 0x01) /* ARB: 32 MSBs of router timeout value */
+#define _BGP_DCR_TR_ARB_RTIME (_BGP_DCR_TR_ARB + 0x02) /* ARB: Value of router timeout counter */
+#define _BGP_DCR_TR_ARB_RSTAT (_BGP_DCR_TR_ARB + 0x03) /* ARB: General router status */
+#define _TR_ARB_RSTAT_REQ20 _BN( 0) /* Packet available in channel 2, VC0 */
+#define _TR_ARB_RSTAT_REQ10 _BN( 1) /* Packet available in channel 1, VC0 */
+#define _TR_ARB_RSTAT_REQ00 _BN( 2) /* Packet available in channel 0, VC0 */
+#define _TR_ARB_RSTAT_REQP0 _BN( 3) /* Packet available in local client, VC0 */
+#define _TR_ARB_RSTAT_REQ21 _BN( 4) /* Packet available in channel 2, VC1 */
+#define _TR_ARB_RSTAT_REQ11 _BN( 5) /* Packet available in channel 1, VC1 */
+#define _TR_ARB_RSTAT_REQ01 _BN( 6) /* Packet available in channel 0, VC1 */
+#define _TR_ARB_RSTAT_REQP1 _BN( 7) /* Packet available in local client, VC1 */
+#define _TR_ARB_RSTAT_FUL20 _BN( 8) /* Channel 2, VC0 is full */
+#define _TR_ARB_RSTAT_FUL10 _BN( 9) /* Channel 1, VC0 is full */
+#define _TR_ARB_RSTAT_FUL00 _BN(10) /* Channel 0, VC0 is full */
+#define _TR_ARB_RSTAT_FULP0 _BN(11) /* Local client, VC0 is full */
+#define _TR_ARB_RSTAT_FUL21 _BN(12) /* Channel 2, VC1 is full */
+#define _TR_ARB_RSTAT_FUL11 _BN(13) /* Channel 1, VC1 is full */
+#define _TR_ARB_RSTAT_FUL01 _BN(14) /* Channel 0, VC1 is full */
+#define _TR_ARB_RSTAT_FULP1 _BN(15) /* Local client, VC1 is full */
+#define _TR_ARB_RSTAT_MAT20 _BN(16) /* Channel 2, VC0 is mature */
+#define _TR_ARB_RSTAT_MAT10 _BN(17) /* Channel 1, VC0 is mature */
+#define _TR_ARB_RSTAT_MAT00 _BN(18) /* Channel 0, VC0 is mature */
+#define _TR_ARB_RSTAT_MATP0 _BN(19) /* Local client, VC0 is mature */
+#define _TR_ARB_RSTAT_MAT21 _BN(20) /* Channel 2, VC1 is mature */
+#define _TR_ARB_RSTAT_MAT11 _BN(21) /* Channel 1, VC1 is mature */
+#define _TR_ARB_RSTAT_MAT01 _BN(22) /* Channel 0, VC1 is mature */
+#define _TR_ARB_RSTAT_MATP1 _BN(23) /* Local client, VC1 is mature */
+#define _TR_ARB_RSTAT_BSY20 _BN(24) /* Channel 2, VC0 is busy */
+#define _TR_ARB_RSTAT_BSY10 _BN(25) /* Channel 1, VC0 is busy */
+#define _TR_ARB_RSTAT_BSY00 _BN(26) /* Channel 0, VC0 is busy */
+#define _TR_ARB_RSTAT_BSYP0 _BN(27) /* Local client, VC0 is busy */
+#define _TR_ARB_RSTAT_BSY21 _BN(28) /* Channel 2, VC1 is busy */
+#define _TR_ARB_RSTAT_BSY11 _BN(29) /* Channel 1, VC1 is busy */
+#define _TR_ARB_RSTAT_BSY01 _BN(30) /* Channel 0, VC1 is busy */
+#define _TR_ARB_RSTAT_BSYP1 _BN(31) /* Local client, VC1 is busy */
+#define _BGP_DCR_TR_ARB_HD00 (_BGP_DCR_TR_ARB + 0x04) /* ARB: Next header, channel 0, VC0 */
+#define _BGP_DCR_TR_ARB_HD01 (_BGP_DCR_TR_ARB + 0x05) /* ARB: Next header, channel 0, VC1 */
+#define _BGP_DCR_TR_ARB_HD10 (_BGP_DCR_TR_ARB + 0x06) /* ARB: Next header, channel 1, VC0 */
+#define _BGP_DCR_TR_ARB_HD11 (_BGP_DCR_TR_ARB + 0x07) /* ARB: Next header, channel 1, VC1 */
+#define _BGP_DCR_TR_ARB_HD20 (_BGP_DCR_TR_ARB + 0x08) /* ARB: Next header, channel 2, VC0 */
+#define _BGP_DCR_TR_ARB_HD21 (_BGP_DCR_TR_ARB + 0x09) /* ARB: Next header, channel 2, VC1 */
+#define _BGP_DCR_TR_ARB_HDI0 (_BGP_DCR_TR_ARB + 0x0A) /* ARB: Next header, injection, VC0 */
+#define _BGP_DCR_TR_ARB_HDI1 (_BGP_DCR_TR_ARB + 0x0B) /* ARB: Next header, injection, VC1 */
+#define _BGP_DCR_TR_ARB_FORCEC (_BGP_DCR_TR_ARB + 0x0C) /* ARB: Force control for manual mode */
+#define _TR_ARB_FORCEC_CH0 _BN( 0) /* Channel 0 is a target */
+#define _TR_ARB_FORCEC_CH1 _BN( 1) /* Channel 1 is a target */
+#define _TR_ARB_FORCEC_CH2 _BN( 2) /* Channel 2 is a target */
+#define _TR_ARB_FORCEC_P _BN( 3) /* Local client is a target */
+#define _TR_ARB_FORCEC_ALU _BN( 4) /* ALU is a target */
+#define _TR_ARB_FORCEC_RT _BN( 5) /* Force route immediately */
+#define _TR_ARB_FORCEC_STK _BN( 6) /* Sticky route: always force route */
+#define _BGP_DCR_TR_ARB_FORCER (_BGP_DCR_TR_ARB + 0x0D) /* ARB: Forced route for manual mode */
+#define _TR_ARB_FORCER_CH20 _BN( 0) /* Channel 2 is a source for channel 0 */
+#define _TR_ARB_FORCER_CH10 _BN( 1) /* Channel 1 is a source for channel 0 */
+#define _TR_ARB_FORCER_CH00 _BN( 2) /* Channel 0 is a source for channel 0 */
+#define _TR_ARB_FORCER_CHP0 _BN( 3) /* Local client is a source for channel 0 */
+#define _TR_ARB_FORCER_CHA0 _BN( 4) /* ALU is a source for channel 0 */
+#define _TR_ARB_FORCER_VC0 _BN( 5) /* VC that is source for channel 0 */
+#define _TR_ARB_FORCER_CH21 _BN( 6) /* Channel 2 is a source for channel 1 */
+#define _TR_ARB_FORCER_CH11 _BN( 7) /* Channel 1 is a source for channel 1 */
+#define _TR_ARB_FORCER_CH01 _BN( 8) /* Channel 0 is a source for channel 1 */
+#define _TR_ARB_FORCER_CHP1 _BN( 9) /* Local client is a source for channel 1 */
+#define _TR_ARB_FORCER_CHA1 _BN(10) /* ALU is a source for channel 1 */
+#define _TR_ARB_FORCER_VC1 _BN(11) /* VC that is source for channel 1 */
+#define _TR_ARB_FORCER_CH22 _BN(12) /* Channel 2 is a source for channel 2 */
+#define _TR_ARB_FORCER_CH12 _BN(13) /* Channel 1 is a source for channel 2 */
+#define _TR_ARB_FORCER_CH02 _BN(14) /* Channel 0 is a source for channel 2 */
+#define _TR_ARB_FORCER_CHP2 _BN(15) /* Local client is a source for channel 2 */
+#define _TR_ARB_FORCER_CHA2 _BN(16) /* ALU is a source for channel 2 */
+#define _TR_ARB_FORCER_VC2 _BN(17) /* VC that is source for channel 2 */
+#define _TR_ARB_FORCER_CH2P _BN(18) /* Channel 2 is a source for local client */
+#define _TR_ARB_FORCER_CH1P _BN(19) /* Channel 1 is a source for local client */
+#define _TR_ARB_FORCER_CH0P _BN(20) /* Channel 0 is a source for local client */
+#define _TR_ARB_FORCER_CHPP _BN(21) /* Local client is a source for local client */
+#define _TR_ARB_FORCER_CHAP _BN(22) /* ALU is a source for local client */
+#define _TR_ARB_FORCER_VCP _BN(23) /* VC that is source for local client */
+#define _TR_ARB_FORCER_CH2A _BN(24) /* Channel 2 is a source for ALU */
+#define _TR_ARB_FORCER_CH1A _BN(25) /* Channel 1 is a source for ALU */
+#define _TR_ARB_FORCER_CH0A _BN(26) /* Channel 0 is a source for ALU */
+#define _TR_ARB_FORCER_CHPA _BN(27) /* Local client is a source for ALU */
+#define _TR_ARB_FORCER_CHAA _BN(28) /* ALU is a source for ALU */
+#define _TR_ARB_FORCER_VCA _BN(29) /* VC that is source for ALU */
+#define _BGP_DCR_TR_ARB_FORCEH (_BGP_DCR_TR_ARB + 0x0E) /* ARB: Forced header for manual mode */
+#define _BGP_DCR_TR_ARB_XSTAT (_BGP_DCR_TR_ARB + 0x0F) /* ARB: Extended router status */
+#define _TR_ARB_XSTAT_BLK20 _BN( 0) /* Request from channel 2, VC0 is blocked */
+#define _TR_ARB_XSTAT_BLK10 _BN( 1) /* Request from channel 1, VC0 is blocked */
+#define _TR_ARB_XSTAT_BLK00 _BN( 2) /* Request from channel 0, VC0 is blocked */
+#define _TR_ARB_XSTAT_BLKP0 _BN( 3) /* Request from local client, VC0 is blocked */
+#define _TR_ARB_XSTAT_BLK21 _BN( 4) /* Request from channel 2, VC1 is blocked */
+#define _TR_ARB_XSTAT_BLK11 _BN( 5) /* Request from channel 1, VC1 is blocked */
+#define _TR_ARB_XSTAT_BLK01 _BN( 6) /* Request from channel 0, VC1 is blocked */
+#define _TR_ARB_XSTAT_BLKP1 _BN( 7) /* Request from local client, VC1 is blocked */
+#define _TR_ARB_XSTAT_BSYR2 _BN( 8) /* Channel 2 receiver is busy */
+#define _TR_ARB_XSTAT_BSYR1 _BN( 9) /* Channel 1 receiver is busy */
+#define _TR_ARB_XSTAT_BSYR0 _BN(10) /* Channel 0 receiver is busy */
+#define _TR_ARB_XSTAT_BSYPI _BN(11) /* Local client injection is busy */
+#define _TR_ARB_XSTAT_BSYA _BN(12) /* ALU is busy */
+#define _TR_ARB_XSTAT_BSYS2 _BN(13) /* Channel 2 sender is busy */
+#define _TR_ARB_XSTAT_BSYS1 _BN(14) /* Channel 1 sender is busy */
+#define _TR_ARB_XSTAT_BSYS0 _BN(15) /* Channel 0 sender is busy */
+#define _TR_ARB_XSTAT_BSYPR _BN(16) /* Local client reception is busy */
+#define _TR_ARB_XSTAT_ARB_TO(x) _B15(31,x) /* Greedy-Arbitration timeout */
+
+/* Tree Channel 0 Control Registers */
+#define _BGP_DCR_TR_CH0_RSTAT (_BGP_DCR_TR_CH0 + 0x00) /* CH0: Receiver status */
+#define _TR_RSTAT_RCVERR _BN( 0) /* Receiver error */
+#define _TR_RSTAT_LHEXP _BN( 1) /* Expect link header */
+#define _TR_RSTAT_PH0EXP _BN( 2) /* Expect packet header 0 */
+#define _TR_RSTAT_PH1EXP _BN( 3) /* Expect packet header 1 */
+#define _TR_RSTAT_PDRCV _BN( 4) /* Receive packet data */
+#define _TR_RSTAT_CWEXP _BN( 5) /* Expect packet control word */
+#define _TR_RSTAT_CSEXP _BN( 6) /* Expect packet checksum */
+#define _TR_RSTAT_SCRBRD0 _B8(14,0xff) /* VC0 fifo scoreboard */
+#define _TR_RSTAT_SCRBRD1 _B8(22,0xff) /* VC1 fifo scoreboard */
+#define _TR_RSTAT_RMTSTAT _B9(31,0x1ff) /* Remote status */
+#define _BGP_DCR_TR_CH0_RCTRL (_BGP_DCR_TR_CH0 + 0x01) /* CH0: Receiver control */
+#define _TR_RCTRL_FERR _BN( 0) /* Force receiver into error state */
+#define _TR_RCTRL_RST _BN( 1) /* Reset all internal pointers */
+#define _TR_RCTRL_FRZ0 _BN( 2) /* Freeze VC0 */
+#define _TR_RCTRL_FRZ1 _BN( 3) /* Freeze VC1 */
+#define _TR_RCTRL_RCVALL _BN( 4) /* Disable receiver CRC check and accept all packets */
+#define _BGP_DCR_TR_CH0_SSTAT (_BGP_DCR_TR_CH0 + 0x02) /* CH0: Sender status */
+#define _TR_SSTAT_SYNC _BN( 0) /* Phase of sender */
+#define _TR_SSTAT_ARB _BN( 1) /* Arbitrating */
+#define _TR_SSTAT_PH0SND _BN( 2) /* Sending packet header 0 */
+#define _TR_SSTAT_PH1SND _BN( 3) /* Sending packet header 1 */
+#define _TR_SSTAT_PDSND _BN( 4) /* Sending packet payload */
+#define _TR_SSTAT_CWSND _BN( 5) /* Sending packet control word */
+#define _TR_SSTAT_CSSND _BN( 6) /* Sending packet checksum */
+#define _TR_SSTAT_IDLSND _BN( 7) /* Sending idle packet */
+#define _TR_SSTAT_RPTR0 _B3(10,0x7) /* VC0 read pointer */
+#define _TR_SSTAT_WPTR0 _B3(13,0x7) /* VC0 write pointer */
+#define _TR_SSTAT_RPTR1 _B3(16,0x7) /* VC1 read pointer */
+#define _TR_SSTAT_WPTR1 _B3(19,0x7) /* VC1 write pointer */
+#define _BGP_DCR_TR_CH0_SCTRL (_BGP_DCR_TR_CH0 + 0x03) /* CH0: Sender control */
+#define _TR_SCTRL_SYNC _BN( 0) /* Force sender to send SYNC */
+#define _TR_SCTRL_IDLE _BN( 1) /* Force sender to send IDLE */
+#define _TR_SCTRL_RST _BN( 2) /* Reset all internal pointers */
+#define _TR_SCTRL_INVMSB _BN( 3) /* Invert MSB of class for loopback packets */
+#define _TR_SCTRL_OFF _BN( 4) /* Disable (black hole) the sender */
+#define _BGP_DCR_TR_CH0_TNACK (_BGP_DCR_TR_CH0 + 0x04) /* CH0: Tolerated dalay from NACK to ACK status */
+#define _BGP_DCR_TR_CH0_CNACK (_BGP_DCR_TR_CH0 + 0x05) /* CH0: Time since last NACK received */
+#define _BGP_DCR_TR_CH0_TIDLE (_BGP_DCR_TR_CH0 + 0x06) /* CH0: Frequency to send IDLE packets */
+#define _BGP_DCR_TR_CH0_CIDLE (_BGP_DCR_TR_CH0 + 0x07) /* CH0: Time since last IDLE sent */
+
+/* Tree Channel 1 Control Registers */
+/* Note: Register definitions are the same as those of channel 0. */
+#define _BGP_DCR_TR_CH1_RSTAT (_BGP_DCR_TR_CH1 + 0x00) /* CH1: Receiver status */
+#define _BGP_DCR_TR_CH1_RCTRL (_BGP_DCR_TR_CH1 + 0x01) /* CH1: Receiver control */
+#define _BGP_DCR_TR_CH1_SSTAT (_BGP_DCR_TR_CH1 + 0x02) /* CH1: Sender status */
+#define _BGP_DCR_TR_CH1_SCTRL (_BGP_DCR_TR_CH1 + 0x03) /* CH1: Sender control */
+#define _BGP_DCR_TR_CH1_TNACK (_BGP_DCR_TR_CH1 + 0x04) /* CH1: Tolerated dalay from NACK to ACK status */
+#define _BGP_DCR_TR_CH1_CNACK (_BGP_DCR_TR_CH1 + 0x05) /* CH1: Time since last NACK received */
+#define _BGP_DCR_TR_CH1_TIDLE (_BGP_DCR_TR_CH1 + 0x06) /* CH1: Frequency to send IDLE packets */
+#define _BGP_DCR_TR_CH1_CIDLE (_BGP_DCR_TR_CH1 + 0x07) /* CH1: Time since last IDLE sent */
+
+/* Tree Channel 2 Control Registers */
+/* Note: Register definitions are the same as those of channel 0. */
+#define _BGP_DCR_TR_CH2_RSTAT (_BGP_DCR_TR_CH2 + 0x00) /* CH2: Receiver status */
+#define _BGP_DCR_TR_CH2_RCTRL (_BGP_DCR_TR_CH2 + 0x01) /* CH2: Receiver control */
+#define _BGP_DCR_TR_CH2_SSTAT (_BGP_DCR_TR_CH2 + 0x02) /* CH2: Sender status */
+#define _BGP_DCR_TR_CH2_SCTRL (_BGP_DCR_TR_CH2 + 0x03) /* CH2: Sender control */
+#define _BGP_DCR_TR_CH2_TNACK (_BGP_DCR_TR_CH2 + 0x04) /* CH2: Tolerated dalay from NACK to ACK status */
+#define _BGP_DCR_TR_CH2_CNACK (_BGP_DCR_TR_CH2 + 0x05) /* CH2: Time since last NACK received */
+#define _BGP_DCR_TR_CH2_TIDLE (_BGP_DCR_TR_CH2 + 0x06) /* CH2: Frequency to send IDLE packets */
+#define _BGP_DCR_TR_CH2_CIDLE (_BGP_DCR_TR_CH2 + 0x07) /* CH2: Time since last IDLE sent */
+
+/* Tree Global Registers */
+#define _BGP_DCR_TR_GLOB_FPTR (_BGP_DCR_TR_GLOB + 0x00) /* GLOB: Fifo Pointer Register */
+#define _TR_GLOB_FPTR_IPY0(x) _B3( 3,x) /* VC0 injection payload FIFO packet write pointer */
+#define _TR_GLOB_FPTR_IHD0(x) _B3( 7,x) /* VC0 injection header FIFO packet write pointer */
+#define _TR_GLOB_FPTR_IPY1(x) _B3(11,x) /* VC1 injection payload FIFO packet write pointer */
+#define _TR_GLOB_FPTR_IHD1(x) _B3(15,x) /* VC1 injection header FIFO packet write pointer */
+#define _TR_GLOB_FPTR_RPY0(x) _B3(19,x) /* VC0 reception payload FIFO packet read pointer */
+#define _TR_GLOB_FPTR_RHD0(x) _B3(23,x) /* VC0 reception header FIFO packet read pointer */
+#define _TR_GLOB_FPTR_RPY1(x) _B3(27,x) /* VC1 reception payload FIFO packet read pointer */
+#define _TR_GLOB_FPTR_RHD1(x) _B3(31,x) /* VC1 reception header FIFO packet read pointer */
+#define _BGP_DCR_TR_GLOB_NADDR (_BGP_DCR_TR_GLOB + 0x01) /* GLOB: Node Address Register */
+#define _TR_GLOB_NADDR(x) _B24(31,x) /* Node address */
+#define _BGP_DCR_TR_GLOB_VCFG0 (_BGP_DCR_TR_GLOB + 0x02) /* GLOB: VC0 Configuration Register (use macros below) */
+#define _BGP_DCR_TR_GLOB_VCFG1 (_BGP_DCR_TR_GLOB + 0x03) /* GLOB: VC1 Configuration Register */
+#define _TR_GLOB_VCFG_RCVALL _BN( 0) /* Disable P2P reception filering */
+#define _TR_GLOB_VCFG_CSUMX(x) _B8(15,x) /* Injection checksum mode 2 exclusion */
+#define _TR_GLOB_VCFG_RWM(x) _B3(23,x) /* Payload reception FIFO watermark */
+#define _TR_GLOB_VCFG_IWM(x) _B3(31,x) /* Payload injection FIFO watermark */
+
+/* Tree Processor Reception Registers */
+#define _BGP_DCR_TR_REC_PRXF (_BGP_DCR_TR_REC + 0x00) /* REC: Receive Exception Flag Register */
+#define _BGP_DCR_TR_REC_PRXEN (_BGP_DCR_TR_REC + 0x01) /* REC: Receive Exception Enable Register */
+#define _TR_REC_PRX_APAR0 _BN( 8) /* P0 address parity error */
+#define _TR_REC_PRX_APAR1 _BN( 9) /* P1 address parity error */
+#define _TR_REC_PRX_ALIGN0 _BN(10) /* P0 address alignment error */
+#define _TR_REC_PRX_ALIGN1 _BN(11) /* P1 address alignment error */
+#define _TR_REC_PRX_ADDR0 _BN(12) /* P0 bad (unrecognized) address error */
+#define _TR_REC_PRX_ADDR1 _BN(13) /* P1 bad (unrecognized) address error */
+#define _TR_REC_PRX_COLL _BN(14) /* FIFO read collision error */
+#define _TR_REC_PRX_UE _BN(15) /* Uncorrectable SRAM ECC error */
+#define _TR_REC_PRX_PFU0 _BN(26) /* VC0 payload FIFO under-run error */
+#define _TR_REC_PRX_PFU1 _BN(27) /* VC1 payload FIFO under-run error */
+#define _TR_REC_PRX_HFU0 _BN(28) /* VC0 header FIFO under-run error */
+#define _TR_REC_PRX_HFU1 _BN(29) /* VC1 header FIFO under-run error */
+#define _TR_REC_PRX_WM0 _BN(30) /* VC0 payload FIFO above watermark */
+#define _TR_REC_PRX_WM1 _BN(31) /* VC1 payload FIFO above watermark */
+#define _BGP_DCR_TR_REC_PRDA (_BGP_DCR_TR_REC + 0x02) /* REC: Receive Diagnostic Address Register */
+#define _TR_PRDA_VC(x) _B1(21,x) /* Select VC to access */
+#define _TR_PRDA_MAC(x) _B1(22,x) /* Select SRAM macro to access */
+#define _TR_PRDA_LINE(x) _B7(29,x) /* Select line in SRAM or RA */
+#define _TR_PRDA_TGT(x) _B2(31,x) /* Select target sub-line or RA */
+#define _TR_PRDA_TGT_LO 0x0 /* Least significant word of SRAM */
+#define _TR_PRDA_TGT_HI 0x1 /* Most significant word of SRAM */
+#define _TR_PRDA_TGT_ECC 0x2 /* ECC syndrome of SRAM */
+#define _TR_PRDA_TGT_HDR 0x3 /* Header fifo */
+#define _BGP_DCR_TR_REC_PRDD (_BGP_DCR_TR_REC + 0x03) /* REC: Receive Diagnostic Data Register */
+#define _TR_PRDD_ECC(x) _B8(31,x) /* ECC */
+#define _TR_PRDD_DATA(x) (x) /* Data */
+
+/* Tree Processor Injection Registers */
+#define _BGP_DCR_TR_INJ_PIXF (_BGP_DCR_TR_INJ + 0x00) /* INJ: Injection Exception Flag Register */
+#define _BGP_DCR_TR_INJ_PIXEN (_BGP_DCR_TR_INJ + 0x01) /* INJ: Injection Exception Enable Register */
+#define _TR_INJ_PIX_APAR0 _BN( 6) /* P0 address parity error */
+#define _TR_INJ_PIX_APAR1 _BN( 7) /* P1 address parity error */
+#define _TR_INJ_PIX_ALIGN0 _BN( 8) /* P0 address alignment error */
+#define _TR_INJ_PIX_ALIGN1 _BN( 9) /* P1 address alignment error */
+#define _TR_INJ_PIX_ADDR0 _BN(10) /* P0 bad (unrecognized) address error */
+#define _TR_INJ_PIX_ADDR1 _BN(11) /* P1 bad (unrecognized) address error */
+#define _TR_INJ_PIX_DPAR0 _BN(12) /* P0 data parity error */
+#define _TR_INJ_PIX_DPAR1 _BN(13) /* P1 data parity error */
+#define _TR_INJ_PIX_COLL _BN(14) /* FIFO write collision error */
+#define _TR_INJ_PIX_UE _BN(15) /* Uncorrectable SRAM ECC error */
+#define _TR_INJ_PIX_PFO0 _BN(25) /* VC0 payload FIFO overflow error */
+#define _TR_INJ_PIX_PFO1 _BN(26) /* VC1 payload FIFO overflow error */
+#define _TR_INJ_PIX_HFO0 _BN(27) /* VC0 header FIFO overflow error */
+#define _TR_INJ_PIX_HFO1 _BN(28) /* VC1 header FIFO overflow error */
+#define _TR_INJ_PIX_WM0 _BN(29) /* VC0 payload FIFO at or below watermark */
+#define _TR_INJ_PIX_WM1 _BN(30) /* VC1 payload FIFO at or below watermark */
+#define _TR_INJ_PIX_ENABLE _BN(31) /* Injection interface enable (if enabled in PIXEN) */
+#define _BGP_DCR_TR_INJ_PIDA (_BGP_DCR_TR_INJ + 0x02) /* INJ: Injection Diagnostic Address Register */
+/* Use _TR_PRDA_* defined above. */
+#define _BGP_DCR_TR_INJ_PIDD (_BGP_DCR_TR_INJ + 0x03) /* INJ: Injection Diagnostic Data Register */
+/* Use _TR_PRDD_* defined above. */
+#define _BGP_DCR_TR_INJ_CSPY0 (_BGP_DCR_TR_INJ + 0x04) /* INJ: VC0 payload checksum */
+#define _BGP_DCR_TR_INJ_CSHD0 (_BGP_DCR_TR_INJ + 0x05) /* INJ: VC0 header checksum */
+#define _BGP_DCR_TR_INJ_CSPY1 (_BGP_DCR_TR_INJ + 0x06) /* INJ: VC1 payload checksum */
+#define _BGP_DCR_TR_INJ_CSHD1 (_BGP_DCR_TR_INJ + 0x07) /* INJ: VC1 header checksum */
+
+
+/* Link CRC's for the receivers 0..2 (vc0,1) */
+#define _BGP_DCR_TR_LCRC_R00 (_BGP_DCR_TR_LCRC + 0)
+#define _BGP_DCR_TR_LCRC_R01 (_BGP_DCR_TR_LCRC + 1)
+#define _BGP_DCR_TR_LCRC_R10 (_BGP_DCR_TR_LCRC + 2)
+#define _BGP_DCR_TR_LCRC_R11 (_BGP_DCR_TR_LCRC + 3)
+#define _BGP_DCR_TR_LCRC_R20 (_BGP_DCR_TR_LCRC + 4)
+#define _BGP_DCR_TR_LCRC_R21 (_BGP_DCR_TR_LCRC + 5)
+
+/* Link CRC'c for the senders 0..2 (vc0,1) */
+#define _BGP_DCR_TR_LCRC_S00 (_BGP_DCR_TR_LCRC + 8)
+#define _BGP_DCR_TR_LCRC_S01 (_BGP_DCR_TR_LCRC + 9)
+#define _BGP_DCR_TR_LCRC_S10 (_BGP_DCR_TR_LCRC + 10)
+#define _BGP_DCR_TR_LCRC_S11 (_BGP_DCR_TR_LCRC + 11)
+#define _BGP_DCR_TR_LCRC_S20 (_BGP_DCR_TR_LCRC + 12)
+#define _BGP_DCR_TR_LCRC_S21 (_BGP_DCR_TR_LCRC + 13)
+
+/* Internal error counters and thresholds */
+#define _BGP_DCR_TR_ERR_R0_CRC (_BGP_DCR_TR_ERR + 0x00) /* CH0: Receiver link CRC errors detected */
+#define _BGP_DCR_TR_ERR_R0_CE (_BGP_DCR_TR_ERR + 0x01) /* CH0: Receiver SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_S0_RETRY (_BGP_DCR_TR_ERR + 0x02) /* CH0: Sender link retransmissions */
+#define _BGP_DCR_TR_ERR_S0_CE (_BGP_DCR_TR_ERR + 0x03) /* CH0: Sender SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_R1_CRC (_BGP_DCR_TR_ERR + 0x04) /* CH1: Receiver link CRC errors detected */
+#define _BGP_DCR_TR_ERR_R1_CE (_BGP_DCR_TR_ERR + 0x05) /* CH1: Receiver SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_S1_RETRY (_BGP_DCR_TR_ERR + 0x06) /* CH1: Sender link retransmissions */
+#define _BGP_DCR_TR_ERR_S1_CE (_BGP_DCR_TR_ERR + 0x07) /* CH1: Sender SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_R2_CRC (_BGP_DCR_TR_ERR + 0x08) /* CH2: Receiver link CRC errors detected */
+#define _BGP_DCR_TR_ERR_R2_CE (_BGP_DCR_TR_ERR + 0x09) /* CH2: Receiver SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_S2_RETRY (_BGP_DCR_TR_ERR + 0x0A) /* CH2: Sender link retransmissions */
+#define _BGP_DCR_TR_ERR_S2_CE (_BGP_DCR_TR_ERR + 0x0B) /* CH2: Sender SRAM errors corrected */
+#define _BGP_DCR_TR_ERR_INJ_SE (_BGP_DCR_TR_ERR + 0x0C) /* INJ: SRAM errors (correctable and uncorrectable) */
+#define _BGP_DCR_TR_ERR_REC_SE (_BGP_DCR_TR_ERR + 0x0D) /* REC: SRAM errors (correctable and uncorrectable) */
+
+#define _BGP_DCR_TR_ERR_R0_CRC_T (_BGP_DCR_TR_ERR + 0x10) /* Interrupt thresholds for corresponding error */
+#define _BGP_DCR_TR_ERR_R0_CE_T (_BGP_DCR_TR_ERR + 0x11) /* counters. */
+#define _BGP_DCR_TR_ERR_S0_RETRY_T (_BGP_DCR_TR_ERR + 0x12)
+#define _BGP_DCR_TR_ERR_S0_CE_T (_BGP_DCR_TR_ERR + 0x13)
+#define _BGP_DCR_TR_ERR_R1_CRC_T (_BGP_DCR_TR_ERR + 0x14)
+#define _BGP_DCR_TR_ERR_R1_CE_T (_BGP_DCR_TR_ERR + 0x15)
+#define _BGP_DCR_TR_ERR_S1_RETRY_T (_BGP_DCR_TR_ERR + 0x16)
+#define _BGP_DCR_TR_ERR_S1_CE_T (_BGP_DCR_TR_ERR + 0x17)
+#define _BGP_DCR_TR_ERR_R2_CRC_T (_BGP_DCR_TR_ERR + 0x18)
+#define _BGP_DCR_TR_ERR_R2_CE_T (_BGP_DCR_TR_ERR + 0x19)
+#define _BGP_DCR_TR_ERR_S2_RETRY_T (_BGP_DCR_TR_ERR + 0x1A)
+#define _BGP_DCR_TR_ERR_S2_CE_T (_BGP_DCR_TR_ERR + 0x1B)
+#define _BGP_DCR_TR_ERR_INJ_SE_T (_BGP_DCR_TR_ERR + 0x1C)
+#define _BGP_DCR_TR_ERR_REC_SE_T (_BGP_DCR_TR_ERR + 0x1D)
+
+/* For _bgp_tree_configure_class */
+#define _BGP_COL_RDR_NUM (16) /* classes are 0..15 */
+
+/* The following interface allows for fine-grain control of the RDR register */
+/* contents. Use bit-wize OR'd together to create a route specification. */
+#define _BGP_COL_RDR_SRC0 (0x1000) /* Bit Number 3 (MSb is bit number 0) */
+#define _BGP_COL_RDR_SRC1 (0x2000) /* Bit Number 2 */
+#define _BGP_COL_RDR_SRC2 (0x4000) /* Bit Number 1 */
+#define _BGP_COL_RDR_SRCL (0x0002) /* Bit Number 14 */
+#define _BGP_COL_RDR_TGT0 (0x0100) /* Bit Number 7 */
+#define _BGP_COL_RDR_TGT1 (0x0200) /* Bit Number 6 */
+#define _BGP_COL_RDR_TGT2 (0x0400) /* Bit Number 5 */
+#define _BGP_COL_RDR_TGTL (0x0001) /* Bit Number 15 */
+
+/* OR of all valid Source and Target bits for SrtTgtEnable validation. */
+#define _BGP_COL_RDR_ACCEPT (0x7703)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/**********************************************************************
+ *
+ * Torus
+ *
+ **********************************************************************/
+
+#define _BGP_DCR_DMA_NUM_VALID_ADDR 8 /* g range */
+#define _BGP_DCR_iDMA_NUM_TS_FIFO_WM 2 /* j range */
+#define _BGP_DCR_rDMA_NUM_TS_FIFO_WM 4 /* p range */
+#define _BGP_DCR_iDMA_NUM_FIFO_REGS 4 /* i range */
+#define _BGP_DCR_iDMA_NUM_FIFO_MAP_REGS 32 /* k range */
+
+
+/* use g for repeated 8X, i repeated 4x, j repeated 2X, k repeated 32x, p repeated 4x */
+
+/* ------------------- */
+/* ---- Controls ----- */
+/* ------------------- */
+
+#define _BGP_DCR_DMA_RESET (_BGP_DCR_DMA+0x00) /* All bits reset to 1. */
+#define _DMA_RESET_DCR _BN( 0) /* Reset the DMA's DCR unit */
+#define _DMA_RESET_PQUE _BN( 1) /* Reset the DMA's Processor Queue unit */
+#define _DMA_RESET_IMFU _BN( 2) /* Reset the DMA's Injection Memory Fifo/Counter Unit */
+#define _DMA_RESET_RMFU _BN( 3) /* Reset the DMA's Reception Memory Fifo/Counter Unit */
+#define _DMA_RESET_LF _BN( 4) /* Reset the DMA's Local Fifo */
+#define _DMA_RESET_ITIU _BN( 5) /* Reset the DMA's Injection Torus Interface Unit */
+#define _DMA_RESET_ICONU _BN( 6) /* Reset the DMA's Injection Transfer Control Unit */
+#define _DMA_RESET_IDAU _BN( 7) /* Reset the DMA's Injection Data Alignment Unit */
+#define _DMA_RESET_IMIU _BN( 8) /* Reset the DMA's Injection L3 Memory Interface Unit */
+#define _DMA_RESET_RTIU _BN( 9) /* Reset the DMA's Reception Torus Interface Unit */
+#define _DMA_RESET_RCONU _BN(10) /* Reset the DMA's Reception Transfer Control Unit */
+#define _DMA_RESET_RDAU _BN(11) /* Reset the DMA's Reception Data Alignment Unit */
+#define _DMA_RESET_RMIU _BN(12) /* Reset the DMA's Reception L3 Memory Interface Unit */
+#define _DMA_RESET_PF _BN(13) /* Reset the DMA's Torus Prefetch Unit */
+ /* 14-30 reserved. */
+#define _DMA_RESET_LNKCHK _BN(31) /* Reset the DMA's Torus Link Packet Capture Unit */
+
+#define _BGP_DCR_DMA_BASE_CONTROL (_BGP_DCR_DMA+0x01)
+#define _DMA_BASE_CONTROL_USE_DMA _BN( 0) /* Use DMA and *not* the Torus if 1, reset state is 0. */
+#define _DMA_BASE_CONTROL_STORE_HDR _BN( 1) /* Store DMA Headers in Reception Header Fifo (debugging) */
+#define _DMA_BASE_CONTROL_PF_DIS _BN( 2) /* Disable Torus Prefetch Unit (should be 0) */
+#define _DMA_BASE_CONTROL_L3BURST_EN _BN( 3) /* Enable L3 Burst when 1 (should be enabled, except for debugging) */
+#define _DMA_BASE_CONTROL_ITIU_EN _BN( 4) /* Enable Torus Injection Data Transfer Unit (never make this zero) */
+#define _DMA_BASE_CONTROL_RTIU_EN _BN( 5) /* Enable Torus Reception Data Transfer Unit */
+#define _DMA_BASE_CONTROL_IMFU_EN _BN( 6) /* Enable DMA Injection Fifo Unit Arbiter */
+#define _DMA_BASE_CONTROL_RMFU_EN _BN( 7) /* Enable DMA Reception fifo Unit Arbiter */
+#define _DMA_BASE_CONTROL_L3PF_DIS _BN( 8) /* Disable L3 Read Prefetch (should be 0) */
+ /* 9..27 reserved. */
+#define _DMA_BASE_CONTROL_REC_FIFO_FULL_STOP_RDMA _BN( 28) /* DD2 Only, ECO 777, RDMA stops when fifo is full */
+#define _DMA_BASE_CONTROL_REC_FIFO_CROSSTHRESH_NOTSTICKY _BN( 29) /* DD2 Only, ECO 777, Rec. Fifo Threshold crossed is not sticky */
+#define _DMA_BASE_CONTROL_INJ_FIFO_CROSSTHRESH_NOTSTICKY _BN( 30) /* DD2 Only, ECO 777, Inj. Fifo Threshold crossed is not sticky */
+ /* 31 - ECO 653, leave at 0 */
+#define _BGP_DCR_DMA_BASE_CONTROL_INIT ( _DMA_BASE_CONTROL_USE_DMA | \
+ _DMA_BASE_CONTROL_L3BURST_EN | \
+ _DMA_BASE_CONTROL_ITIU_EN | \
+ _DMA_BASE_CONTROL_RTIU_EN | \
+ _DMA_BASE_CONTROL_IMFU_EN | \
+ _DMA_BASE_CONTROL_RMFU_EN)
+
+/* g in the interval [0:7]: */
+/* 32bit 16Byte aligned Physical Addresses containing (0..3 of UA | 0..27 of PA). */
+#define _BGP_DCR_iDMA_MIN_VALID_ADDR(g) (_BGP_DCR_DMA+((2*(g))+0x02))
+#define _BGP_DCR_iDMA_MAX_VALID_ADDR(g) (_BGP_DCR_DMA+((2*(g))+0x03))
+
+#define _BGP_DCR_iDMA_INJ_RANGE_TLB (_BGP_DCR_DMA+0x12)
+#define _iDMA_INT_RANGE_TLB_L3CIN(r) _BN( 0+((r)*4)) /* (oops typo) 'r' in {0..7} Bit 0 of each range is L3 Cache Inhibit */
+#define _iDMA_INT_RANGE_TLB_L3SCR(r) _BN( 1+((r)*4)) /* (oops typo) 'r' in {0..7} Bit 1 of each range is L3 ScratchPad. */
+#define _iDMA_INJ_RANGE_TLB_L3CIN(r) _BN( 0+((r)*4)) /* 'r' in {0..7} Bit 0 of each range is L3 Cache Inhibit */
+#define _iDMA_INJ_RANGE_TLB_L3SCR(r) _BN( 1+((r)*4)) /* 'r' in {0..7} Bit 1 of each range is L3 ScratchPad. */
+ /* Bits 2,3 of each range are reserved. */
+
+#define _BGP_DCR_rDMA_REC_RANGE_TLB (_BGP_DCR_DMA+0x13)
+#define _rDMA_REC_RANGE_TLB_L3CIN(r) _BN( 0+((r)*4)) /* 'r' in {0..7} Bit 0 of each range is L3 Cache Inhibit */
+#define _rDMA_REC_RANGE_TLB_L3SCR(r) _BN( 1+((r)*4)) /* 'r' in {0..7} Bit 1 of each range is L3 ScratchPad. */
+
+/* g in the interval [0:7] */
+/* 32bit 16Byte aligned Physical Addresses containing (0..3 of UA | 0..27 of PA). */
+#define _BGP_DCR_rDMA_MIN_VALID_ADDR(g) (_BGP_DCR_DMA+((2*(g))+0x14))
+#define _BGP_DCR_rDMA_MAX_VALID_ADDR(g) (_BGP_DCR_DMA+((2*(g))+0x15))
+
+/* j in the interval [0:1] */
+#define _BGP_DCR_iDMA_TS_FIFO_WM(j) (_BGP_DCR_DMA+(0x24+(j)))
+#define _iDMA_TS_FIFO_WM_N0(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */
+#define _iDMA_TS_FIFO_WM_N1(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */
+#define _iDMA_TS_FIFO_WM_N2(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */
+#define _iDMA_TS_FIFO_WM_P0(x) _B6(31,(x)) /* bit {26..31} of _BGP_DCR_iDMA_TORUS_FIFO_WM(0), should be set to decimal 20 */
+#define _iDMA_TS_FIFO_WM_N3(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */
+#define _iDMA_TS_FIFO_WM_N4(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */
+#define _iDMA_TS_FIFO_WM_N5(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */
+#define _iDMA_TS_FIFO_WM_P1(x) _B6(31,(x)) /* bit {26..31} of _BGP_DCR_iDMA_TORUS_FIFO_WM(1), should be set to decimal 20 */
+
+#define _iDMA_TS_FIFO_WM0_INIT (_iDMA_TS_FIFO_WM_N0(20) | \
+ _iDMA_TS_FIFO_WM_N1(20) | \
+ _iDMA_TS_FIFO_WM_N2(20) | \
+ _iDMA_TS_FIFO_WM_P0(20))
+#define _iDMA_TS_FIFO_WM1_INIT (_iDMA_TS_FIFO_WM_N3(20) | \
+ _iDMA_TS_FIFO_WM_N4(20) | \
+ _iDMA_TS_FIFO_WM_N5(20) | \
+ _iDMA_TS_FIFO_WM_P1(20))
+
+#define _BGP_DCR_iDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY (_BGP_DCR_DMA+0x26)
+#define _iDMA_LOCAL_FIFO_WM(x) _B7(7,(x)) /* bit {1..7} of _BGP_DCR_iDMA_LOCAL_FIFO_WM_RPT_CNT, set to decimal 55, 0x37 */
+#define _iDMA_HP_INJ_FIFO_RPT_CNT(x) _B4(11,(x)) /* bit {8..11} dma repeat count for using torus high priority injection fifo */
+#define _iDMA_NP_INJ_FIFO_RPT_CNT(x) _B4(15,(x)) /* bit {12..15} dma repeat count for using torus normal priority injection fifo */
+#define _iDMA_INJ_DELAY(x) _B4(23,(x)) /* bit {20..23} dma delay this amount of clock_x2 cycles before injecting next packet */
+
+#define _iDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY_INIT (_iDMA_LOCAL_FIFO_WM(55) | \
+ _iDMA_HP_INJ_FIFO_RPT_CNT(0) | \
+ _iDMA_NP_INJ_FIFO_RPT_CNT(0) | \
+ _iDMA_INJ_DELAY(0))
+
+/* p in the interval [0:3] */
+#define _BGP_DCR_rDMA_TS_FIFO_WM(p) (_BGP_DCR_DMA+(0x27+(p)))
+#define _rDMA_TS_FIFO_WM_G0N0(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */
+#define _rDMA_TS_FIFO_WM_G0N1(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */
+#define _rDMA_TS_FIFO_WM_G0N2(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */
+#define _rDMA_TS_FIFO_WM_G0N3(x) _B6(31,(x)) /* bit {26..31} of _BGP_DCR_rDMA_TORUS_FIFO_WM(0), must be 0 */
+#define _rDMA_TS_FIFO_WM_G0N4(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_rDMA_TORUS_FIFO_WM(1), must be 0 */
+#define _rDMA_TS_FIFO_WM_G0N5(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(1), must be 0 */
+#define _rDMA_TS_FIFO_WM_G0P(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(1), must be 0 */
+#define _rDMA_TS_FIFO_WM_G1N0(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */
+#define _rDMA_TS_FIFO_WM_G1N1(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */
+#define _rDMA_TS_FIFO_WM_G1N2(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */
+#define _rDMA_TS_FIFO_WM_G1N3(x) _B6(31,(x)) /* bit {26..31} of _BGP_DCR_rDMA_TORUS_FIFO_WM(2), must be 0 */
+#define _rDMA_TS_FIFO_WM_G1N4(x) _B6(7,(x)) /* bit {2..7} of _BGP_DCR_rDMA_TORUS_FIFO_WM(3), must be 0 */
+#define _rDMA_TS_FIFO_WM_G1N5(x) _B6(15,(x)) /* bit {10..15} of _BGP_DCR_rDMA_TORUS_FIFO_WM(3), must be 0 */
+#define _rDMA_TS_FIFO_WM_G1P(x) _B6(23,(x)) /* bit {18..23} of _BGP_DCR_rDMA_TORUS_FIFO_WM(3), must be 0 */
+
+#define _rDMA_TS_FIFO_WM0_INIT (_rDMA_TS_FIFO_WM_G0N0(0) | \
+ _rDMA_TS_FIFO_WM_G0N1(0) | \
+ _rDMA_TS_FIFO_WM_G0N2(0) | \
+ _rDMA_TS_FIFO_WM_G0N3(0))
+#define _rDMA_TS_FIFO_WM1_INIT (_rDMA_TS_FIFO_WM_G0N4(0) | \
+ _rDMA_TS_FIFO_WM_G0N5(0) | \
+ _rDMA_TS_FIFO_WM_G0P(0))
+#define _rDMA_TS_FIFO_WM2_INIT (_rDMA_TS_FIFO_WM_G1N0(0) | \
+ _rDMA_TS_FIFO_WM_G1N1(0) | \
+ _rDMA_TS_FIFO_WM_G1N2(0) | \
+ _rDMA_TS_FIFO_WM_G1N3(0))
+#define _rDMA_TS_FIFO_WM3_INIT (_rDMA_TS_FIFO_WM_G1N4(0) | \
+ _rDMA_TS_FIFO_WM_G1N5(0) | \
+ _rDMA_TS_FIFO_WM_G1P(0))
+
+#define _BGP_DCR_rDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY (_BGP_DCR_DMA+0x2b)
+#define _rDMA_LOCAL_FIFO_WM(x) _B7(7,(x)) /* bit {1..7}, local fifo watermark, must be 0 */
+#define _rDMA_HP_REC_FIFO_RPT_CNT(x) _B4(11,(x)) /* bit {8..11}, dma repeat count for torus high priority reception fifos */
+#define _rDMA_NP_REC_FIFO_RPT_CNT(x) _B4(15,(x)) /* bit {12..15}, dma repeat count for torus normal priority reception fifos */
+#define _rDMA_DELAY(x) _B4(23,(x)) /* bit {20..23}, dma delay this amount of clock_x2 cycles between packets */
+
+#define _rDMA_LOCAL_FIFO_WM_RPT_CNT_DELAY_INIT (_rDMA_LOCAL_FIFO_WM(0) | \
+ _rDMA_HP_REC_FIFO_RPT_CNT(0) | \
+ _rDMA_NP_REC_FIFO_RPT_CNT(0) | \
+ _rDMA_DELAY(0))
+
+/* i in the interval [0:3] */
+#define _BGP_DCR_iDMA_FIFO_ENABLE(i) (_BGP_DCR_DMA+(0x2c+(i))) /* each bit, if '1', enables an injection fifo */
+#define _BGP_DCR_rDMA_FIFO_ENABLE (_BGP_DCR_DMA+0x30) /* each bit, if '1', enables a reception fifo */
+#define _BGP_DCR_rDMA_FIFO_ENABLE_HEADER (_BGP_DCR_DMA+0x31)
+#define _rDMA_FIFO_ENABLE_HEADER0 _BN(28)
+#define _rDMA_FIFO_ENABLE_HEADER1 _BN(29)
+#define _rDMA_FIFO_ENABLE_HEADER2 _BN(30)
+#define _rDMA_FIFO_ENABLE_HEADER3 _BN(31)
+
+/* i in the interval [0:3] */
+#define _BGP_DCR_iDMA_FIFO_PRIORITY(i) (_BGP_DCR_DMA+(0x32+(i)))
+#define _BGP_DCR_iDMA_FIFO_RGET_THRESHOLD (_BGP_DCR_DMA+0x36)
+#define _BGP_DCR_iDMA_SERVICE_QUANTA (_BGP_DCR_DMA+0x37)
+#define _iDMA_SERVICE_QUANTA_HP(x) _B16(15,(x))
+#define _iDMA_SERVICE_QUANTA_NP(x) _B16(31,(x))
+#define _iDMA_SERVICE_QUANTA_INIT (_iDMA_SERVICE_QUANTA_HP(0) | _iDMA_SERVICE_QUANTA_NP(0))
+
+#define _BGP_DCR_rDMA_FIFO_TYPE (_BGP_DCR_DMA+0x38)
+#define _BGP_DCR_rDMA_FIFO_TYPE_HEADER (_BGP_DCR_DMA+0x39)
+#define _rDMA_FIFO_TYPE_HEADER0 _BN(28)
+#define _rDMA_FIFO_TYPE_HEADER1 _BN(29)
+#define _rDMA_FIFO_TYPE_HEADER2 _BN(30)
+#define _rDMA_FIFO_TYPE_HEADER3 _BN(31)
+#define _BGP_DCR_rDMA_FIFO_THRESH0 (_BGP_DCR_DMA+0x3a)
+#define _BGP_DCR_rDMA_FIFO_THRESH1 (_BGP_DCR_DMA+0x3b)
+
+/* k in the interval [0:31] */
+#define _BGP_DCR_iDMA_TS_INJ_FIFO_MAP(k) (_BGP_DCR_DMA+(0x3c+(k))) /* 8 bits for every dma injection fifo */
+/* @ Dong, for MG, is the following line good? */
+/* j in the interval [0:3] */
+#define _iDMA_TS_INJ_FIFO_MAP_FIELD(j, x) _B8((7+(j)*8), (x))
+/* i in the interval [0:3] */
+#define _BGP_DCR_iDMA_LOCAL_COPY(i) (_BGP_DCR_DMA+(0x5c+(i))) /* one bit for every dma injection fifo */
+
+/* XY = X, Y */
+/* ZHL = Z, High Priority, Local Copy */
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_XY (_BGP_DCR_DMA+0x60) /* torus recv group 0, (pid0, pid1) = "00" */
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID00_ZHL (_BGP_DCR_DMA+0x61)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_XY (_BGP_DCR_DMA+0x62)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G0_PID01_ZHL (_BGP_DCR_DMA+0x63)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_XY (_BGP_DCR_DMA+0x64)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID10_ZHL (_BGP_DCR_DMA+0x65)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_XY (_BGP_DCR_DMA+0x66)
+#define _BGP_DCR_rDMA_TS_REC_FIFO_MAP_G1_PID11_ZHL (_BGP_DCR_DMA+0x67)
+#define _rDMA_TS_REC_FIFO_MAP_XP(x) _B8(7,(x))
+#define _rDMA_TS_REC_FIFO_MAP_XM(x) _B8(15,(x))
+#define _rDMA_TS_REC_FIFO_MAP_YP(x) _B8(23,(x))
+#define _rDMA_TS_REC_FIFO_MAP_YM(x) _B8(31,(x))
+#define _rDMA_TS_REC_FIFO_MAP_ZP(x) _B8(7,(x))
+#define _rDMA_TS_REC_FIFO_MAP_ZM(x) _B8(15,(x))
+#define _rDMA_TS_REC_FIFO_MAP_HIGH(x) _B8(23,(x))
+#define _rDMA_TS_REC_FIFO_MAP_LOCAL(x) _B8(31,(x))
+
+/* ii in the interval [0:3] group 0, group 1, ..., group 3 */
+#define _BGP_DCR_rDMA_FIFO_CLEAR_MASK(ii) (_BGP_DCR_DMA+(0x68+(ii)))
+#define _rDMA_FIFO_CLEAR_MASK0_INIT 0xFF000000
+#define _rDMA_FIFO_CLEAR_MASK1_INIT 0x00FF0000
+#define _rDMA_FIFO_CLEAR_MASK2_INIT 0x0000FF00
+#define _rDMA_FIFO_CLEAR_MASK3_INIT 0x000000FF
+#define _BGP_DCR_rDMA_FIFO_HEADER_CLEAR_MASK (_BGP_DCR_DMA+0x6c)
+#define _rDMA_FIFO_HEADER_CLEAR_MASK_INIT 0x08040201
+
+/* g in the interval [0:3] group 0, group 1, group2, and group 3 */
+#define _BGP_DCR_iDMA_FIFO_INT_ENABLE_GROUP(g) (_BGP_DCR_DMA+(0x6d+(g)))
+/* t in the interval [0:3] type 0, type 1, ..., type 3 */
+#define _BGP_DCR_rDMA_FIFO_INT_ENABLE_TYPE(t) (_BGP_DCR_DMA+(0x71+(t)))
+#define _BGP_DCR_rDMA_HEADER_FIFO_INT_ENABLE (_BGP_DCR_DMA+0x75)
+#define _rDMA_HEADER_HEADER_FIFO_INT_ENABLE_TYPE(t,x) _B4((7+(t)*8), (x))
+
+/* g in the interval [0:3] group 0, group 1, ..., group 3 */
+#define _BGP_DCR_iDMA_COUNTER_INT_ENABLE_GROUP(g) (_BGP_DCR_DMA+(0x76+(g)))
+
+/* g in the interval [0:3] group 0, group 1, ..., group 3 */
+#define _BGP_DCR_rDMA_COUNTER_INT_ENABLE_GROUP(g) (_BGP_DCR_DMA+(0x7a+(g)))
+
+/* ---------------------------- */
+/* ---- Fatal Error Enables ----- */
+/* ---------------------------- */
+/* e in the interval [0:3], bit definition in the fatal errors at 0x93 - 0x96 */
+#define _BGP_DCR_DMA_FATAL_ERROR_ENABLE(e) (_BGP_DCR_DMA +(0x7e +(e)))
+
+/* ------------------------------- */
+/* ---- Backdoor Access Regs ----- */
+/* ------------------------------- */
+#define _BGP_DCR_DMA_LF_IMFU_DESC_BD_CTRL (_BGP_DCR_DMA+0x82)
+#define _DMA_LF_IMFU_DESC_BD_CTRL_ENABLE _BN(0) /* if '1', enable backdoor read/write */
+#define _DMA_LF_IMFU_DESC_BD_CTRL_NOECC _BN(1) /* if '1', do not do ECC on backdoor read/write */
+#define _DMA_LF_IMFU_DESC_BD_CTRL_RD_REQ _BN(2) /* if '1', do read */
+#define _DMA_LF_IMFU_DESC_BD_CTRL_WR_REQ _BN(3) /* if '1', do write */
+#define _DMA_LF_IMFU_DESC_BD_CTRL_IMFU_SEL _BN(4) /* unit select, '0' local fifo, '1' imfu descriptor */
+#define _DMA_LF_IMFU_DESC_BD_CTRL_LF_ADDR(x) _B7(15,(x)) /* 7 bit sram address for local fifo */
+#define _DMA_LF_IMFU_DESC_BD_CTRL_IMFU_ADDR(x) _B8(15,(x)) /* 8 bit sram address for imfu descriptor */
+#define _DMA_LF_IMFU_DESC_BD_CTRL_WR_ECC0(x) _B8(23,(x)) /* 8 bit write ECC for data bits 0 to 63 */
+#define _DMA_LF_IMFU_DESC_BD_CTRL_WR_ECC1(x) _B8(31,(x)) /* 8 bit write ECC for data bits 64 to 128 */
+/* i in the interval [0:3] */
+#define _BGP_DCR_DMA_LF_IMFU_DESC_BACKDOOR_WR_DATA(i) (_BGP_DCR_DMA+(0x83+(i))) /* 128 bit backdoor write data */
+#define _BGP_DCR_DMA_ARRAY_BD_CTRL (_BGP_DCR_DMA+0x87) /* fifo/counter array backdoor control */
+#define _DMA_ARRAY_BD_CTRL_ENABLE _BN(0)
+#define _DMA_ARRAY_BD_CTRL_RD_SEL_IMFU_FIFO _B2(2,0) /* unit select for backdoor read */
+#define _DMA_ARRAY_BD_CTRL_RD_SEL_IMFU_COUNTER _B2(2,1)
+#define _DMA_ARRAY_BD_CTRL_RD_SEL_RMFU_FIFO _B2(2,2)
+#define _DMA_ARRAY_BD_CTRL_RD_SEL_RMFU_COUNTER _B2(2,3)
+#define _DMA_ARRAY_BD_CTRL_WR_ECC(x) _B7(15,(x))
+
+/* ------------------------------------- */
+/* ---- Torus Link Checker Control ----- */
+/* ------------------------------------- */
+#define _BGP_DCR_DMA_TS_LINK_CHK_CTRL (_BGP_DCR_DMA+0x88)
+#define _DMA_TS_LINK_CHK_CTRL_SEL(x) _B3(2,(x)) /* 0 - xp, 1 - xm, 2 - yp, 3 - ym, 4 - zp, 5 - zm, 6, 7 disable */
+#define _DMA_TS_LINK_CHK_CTRL_RW_ENABLE _BN(8) /* if 1, enable read/write to link checker internal sram */
+#define _DMA_TS_LINK_CHK_CTRL_WR_REQ _BN(12)
+#define _DMA_TS_LINK_CHK_CTRL_RD_REQ _BN(13)
+#define _DMA_TS_LINK_CHK_CTRL_ADDR(x) _B10(23,(x))
+#define _DMA_TS_LINK_CHK_CTRL_WR_DATA(x) _B8(31,(x))
+#define _DMA_TS_LINK_CHK_BAD_OFFSET (0) /* sram address where bad packet starts */
+#define _DMA_TS_LINK_CHK_GOOD_OFFSET (320) /* sram address where good packet starts */
+
+
+/* -------------------- */
+/* ---- Threshold ----- */
+/* -------------------- */
+#define _BGP_DCR_DMA_CE_COUNT_THRESHOLD (_BGP_DCR_DMA+0x89) /* correctable ecc error count threshold, reset to 0xFFFFFFFF */
+/* default used when system comes out of reset, will have to be tuned */
+#define _BGP_DCR_DMA_CE_COUNT_THRESHOLD_INIT 1
+
+/* ---------------------------------- */
+/* ---- Correctable error count ----- */
+/* ---------------------------------- */
+/* c in the interval [0:8] count 0, count 1, ..., count 8 */
+#define _BGP_DCR_DMA_CE_COUNT(c) (_BGP_DCR_DMA+(0x8A+(c)))
+#define _BGP_DCR_DMA_CE_COUNT_INJ_FIFO0 (_BGP_DCR_DMA+0x8A)
+#define _BGP_DCR_DMA_CE_COUNT_INJ_FIFO1 (_BGP_DCR_DMA+0x8B)
+#define _BGP_DCR_DMA_CE_COUNT_INJ_COUNTER (_BGP_DCR_DMA+0x8C)
+#define _BGP_DCR_DMA_CE_COUNT_INJ_DESC (_BGP_DCR_DMA+0x8D)
+#define _BGP_DCR_DMA_CE_COUNT_REC_FIFO0 (_BGP_DCR_DMA+0x8E)
+#define _BGP_DCR_DMA_CE_COUNT_REC_FIFO1 (_BGP_DCR_DMA+0x8F)
+#define _BGP_DCR_DMA_CE_COUNT_REC_COUNTER (_BGP_DCR_DMA+0x90)
+#define _BGP_DCR_DMA_CE_COUNT_LOCAL_FIFO0 (_BGP_DCR_DMA+0x91)
+#define _BGP_DCR_DMA_CE_COUNT_LOCAL_FIFO1 (_BGP_DCR_DMA+0x92)
+
+/* upon termination, create RAS event if any of the above counts are greater than this value */
+#define _BGP_DCR_DMA_CE_TERM_THRESH 0
+
+/* ----------------- */
+/* ---- Status ----- */
+/* ----------------- */
+/* e in the interval [0:3] error0, error1, ..., error 3 */
+#define _BGP_DCR_DMA_FATAL_ERROR(e) (_BGP_DCR_DMA+(0x93+(e)))
+
+/* Below are are error conditions most likely caused by software */
+#define _BGP_DCR_DMA_FATAL_ERROR0_WR0_MSB _BN(4) /* pque wr0 msb not 0 */
+#define _BGP_DCR_DMA_FATAL_ERROR0_RD0_MSB _BN(8) /* pque rd0 msb not 0 */
+#define _BGP_DCR_DMA_FATAL_ERROR0_WR1_MSB _BN(12) /* pque wr1 msb not 0 */
+#define _BGP_DCR_DMA_FATAL_ERROR0_RD1_MSB _BN(16) /* pque rd1 msb not 0 */
+
+#define _BGP_DCR_DMA_FATAL_ERROR1_REC_MAP _BN(22) /* multiple bits set for the dcr rec fifo map */
+
+
+#define _BGP_DCR_DMA_FATAL_ERROR2_FIFO_SEL _BN(14) /* fifo_sel_n error */
+#define _BGP_DCR_DMA_FATAL_ERROR2_FIFO_SEL_FORM _BN(15) /* fifo_sel_n_form error */
+#define _BGP_DCR_DMA_FATAL_ERROR2_READ_RANGE _BN(25) /* read from address not in one of dcr address ranges */
+
+#define _BGP_DCR_DMA_FATAL_ERROR3_DPUT_SIZE _BN(8) /* direct put packet had greater than 240 bytes */
+#define _BGP_DCR_DMA_FATAL_ERROR3_RGET_SIZE _BN(9) /* remote get packet had greater than 240 bytes */
+#define _BGP_DCR_DMA_FATAL_ERROR3_MAX_ADDRESS _BN(18) /* write to address larger than counter max */
+#define _BGP_DCR_DMA_FATAL_ERROR3_WRITE_RANGE _BN(26) /* write to address not in one of dcr address ranges */
+
+#define _BGP_DCR_DMA_PQUE_WR0_BAD_ADDR (_BGP_DCR_DMA+0x97)
+#define _BGP_DCR_DMA_PQUE_RD0_BAD_ADDR (_BGP_DCR_DMA+0x98)
+#define _BGP_DCR_DMA_PQUE_WR1_BAD_ADDR (_BGP_DCR_DMA+0x99)
+#define _BGP_DCR_DMA_PQUE_RD1_BAD_ADDR (_BGP_DCR_DMA+0x9a)
+
+#define _BGP_DCR_DMA_MFU_STAT0 (_BGP_DCR_DMA+0x9b)
+#define _DMA_MFU_STAT0_IMFU_NOT_ENABLED_COUNTER_ID(x) _G8((x), 7) /* idma not enabled counter id */
+#define _DMA_MFU_STAT0_IMFU_UNDERFLOW_COUNTER_ID(x) _G8((x), 15) /* idma underflow counter id */
+#define _DMA_MFU_STAT0_IMFU_OVERFLOW_NB_ADDR(x) _G16((x), 31) /* idma netbus addr that caused counter overflow */
+#define _BGP_DCR_DMA_MFU_STAT1 (_BGP_DCR_DMA+0x9c)
+#define _DMA_MFU_STAT1_IMFU_CUR_FIFO_ID(x) _G7((x), 7) /* current fifo id that idma is working on */
+#define _DMA_MFU_STAT1_RMFU_UNDERFLOW_COUNTER_ID(x) _G8((x), 15) /* rdma underflow counter id */
+#define _DMA_MFU_STAT1_RMFU_OVERFLOW_NB_ADDR(x) _G16((x), 31) /* rdma netbus addr that caused counter overflow */
+#define _BGP_DCR_DMA_MFU_STAT2 (_BGP_DCR_DMA+0x9d)
+#define _DMA_MFU_STAT2_RMFU_FIFO_NE_OR_NA(x) _GN((x), 0) /* rdma fifo not enabled or not all_available */
+#define _DMA_MFU_STAT2_RMFU_HDR_FIFO_NE_OR_NA(x) _GN((x), 1) /* rdma header fifo not enabled or not all_available */
+#define _DMA_MFU_STAT2_RMFU_INJ_FIFO_NE_OR_NA(x) _GN((x), 2) /* rdma injection fifo for remote get not enabled or not all_available */
+#define _DMA_MFU_STAT2_RMFU_COUNTER_NE(x) _GN((x), 3) /* rdma accessing not enabled counter */
+#define _DMA_MFU_STAT2_RMFU_PKT_PID(x) _G2((x), 7) /* rdma receiving packet pid */
+#define _DMA_MFU_STAT2_RMFU_FIFO_BIT(x) _G8((x), 15) /* rdma receiving packet fifo bit, only one bit should be set */
+ /* bit orders are xp, xm, yp, ym, zp, zm, hp, local */
+#define _DMA_MFU_STAT2_RMFU_RGET_FIFO_ID(x) _G8((x), 23) /* rdma remote get (injection) fifo id */
+#define _DMA_MFU_STAT2_RMFU_COUNTER_ID(x) _G8((x), 31) /* rdma direct put counter id */
+#define _BGP_DCR_DMA_L3_RD_ERROR_ADDR (_BGP_DCR_DMA+0x9e)
+#define _BGP_DCR_DMA_L3_WR_ERROR_ADDR (_BGP_DCR_DMA+0x9f)
+
+/* i in the interval [0:3] */
+#define _BGP_DCR_DMA_LF_IMFU_DESC_BD_RD_DATA(i) (_BGP_DCR_DMA+(0xa0+(i)))
+#define _BGP_DCR_DMA_LF_IMFU_DESC_BD_RD_ECC (_BGP_DCR_DMA+0xa4)
+#define _DMA_LF_IMFU_DESC_BD_RD_ECC_DWORD0(x) _G8((x),23) /* ecc for data bits 0 to 63 */
+#define _DMA_LF_IMFU_DESC_BD_RD_ECC_DWORD1(x) _G8((x),31) /* ecc for data bits 64 to 127 */
+#define _BGP_DCR_DMA_ARRAY_RD_ECC (_BGP_DCR_DMA+0xa5)
+#define _DMA_ARRAY_RD_ECC_WORD0(x) _G7((x), 7) /* word address offset 0 */
+#define _DMA_ARRAY_RD_ECC_WORD1(x) _G7((x), 15) /* word address offset 1 */
+#define _DMA_ARRAY_RD_ECC_WORD2(x) _G7((x), 23) /* word address offset 2 */
+#define _DMA_ARRAY_RD_ECC_WORD3(x) _G7((x), 31) /* word address offset 3 */
+#define _BGP_DCR_DMA_TS_LINK_CHK_STAT (_BGP_DCR_DMA+0xa6)
+#define _DMA_TS_LINK_CHK_STAT_PKT_CAPTURED(x) _GN((x), 0) /* bad packet captured flag */
+#define _DMA_TS_LINK_CHK_STAT_RECV_PIPE_FERR(x) _GN((x), 1) /* receive pipe fatal error */
+#define _DMA_TS_LINK_CHK_STAT_STATE(x) _G4((x), 7) /* state machine state */
+#define _DMA_TS_LINK_CHK_STAT_SRAM_ADDR(x) _G10((x), 23) /* current sram read or write address */
+#define _DMA_TS_LINK_CHK_STAT_SRAM_RD_DATA(x) _G8((x), 31) /* sram read data */
+
+/* ---- Debug ----- */
+/* i in the interval [0:3] */
+#define _BGP_DCR_DMA_iFIFO_DESC_RD_FLAG(i) (_BGP_DCR_DMA+(0xa7+(i)))
+/* j in the interval [0:1] */
+#define _BGP_DCR_DMA_INTERNAL_STATE(j) (_BGP_DCR_DMA+(0xab+(j)))
+#define _DMA_INTERNAL_STATE0_IMFU_SEL_STATE(x) _G3((x), 2)
+#define _DMA_INTERNAL_STATE0_IMFU_ARB_STATE(x) _G5((x), 7)
+#define _DMA_INTERNAL_STATE0_IMFU_FIFO_ARB_STATE(x) _G5((x), 12)
+#define _DMA_INTERNAL_STATE0_IMFU_CNT_ARB_STATE(x) _G4((x), 16)
+#define _DMA_INTERNAL_STATE0_RMFU_ARB_STATE(x) _G5((x), 23)
+#define _DMA_INTERNAL_STATE0_RMFU_FIFO_ARB_STATE(x) _G4((x), 27)
+#define _DMA_INTERNAL_STATE0_RMFU_CNT_ARB_STATE(x) _G4((x), 31)
+
+#define _DMA_INTERNAL_STATE1_PQUE_ARB_STATE(x) _G3((x), 2)
+#define _DMA_INTERNAL_STATE1_ICONU_SM_STATE(x) _G4((x), 6)
+#define _DMA_INTERNAL_STATE1_IFSU_SM_STATE(x) _G3((x), 9)
+#define _DMA_INTERNAL_STATE1_IDAU_L3RSM_STATE(x) _G3((x), 12)
+#define _DMA_INTERNAL_STATE1_IDAU_L3VSM_STATE(x) _G3((x), 15)
+#define _DMA_INTERNAL_STATE1_IDAU_TTSM_STATE(x) _G3((x), 18)
+#define _DMA_INTERNAL_STATE1_RCONU_SM_STATE(x) _G4((x), 22)
+#define _DMA_INTERNAL_STATE1_RFSU_SM_STATE(x) _G3((x), 25)
+#define _DMA_INTERNAL_STATE1_RDAU_QRSM_STATE(x) _G3((x), 28)
+#define _DMA_INTERNAL_STATE1_RDAU_L3SM_STATE(x) _G3((x), 31)
+
+/* values for _BGP_DCR_DMA_INTERNAL_STATE when all state machines are in idle, or wait state */
+#define _BGP_DCR_DMA_INTERNAL_STATE_0_IDLE (0x21088111)
+
+/* values for _BGP_DCR_DMA_INTERNAL_STATE when all state machines are in idle, or wait state */
+#define _BGP_DCR_DMA_INTERNAL_STATE_0_IDLE (0x21088111)
+#define _BGP_DCR_DMA_INTERNAL_STATE_1_IDLE (0x22492249)
+
+#define _BGP_DCR_DMA_PQUE_POINTER (_BGP_DCR_DMA+0xad)
+#define _DMA_PQUE_POINTER_WR0_BEGIN(x) _G4((x),3)
+#define _DMA_PQUE_POINTER_WR0_END(x) _G4((x),7)
+#define _DMA_PQUE_POINTER_RD0_BEGIN(x) _G4((x),11)
+#define _DMA_PQUE_POINTER_RD0_END(x) _G4((x),15)
+#define _DMA_PQUE_POINTER_WR1_BEGIN(x) _G4((x),19)
+#define _DMA_PQUE_POINTER_WR1_END(x) _G4((x),23)
+#define _DMA_PQUE_POINTER_RD1_BEGIN(x) _G4((x),27)
+#define _DMA_PQUE_POINTER_RD1_END(x) _G4((x),31)
+#define _BGP_DCR_DMA_LOCAL_FIFO_POINTER (_BGP_DCR_DMA+0xae)
+#define _DMA_LOCAL_FIFO_POINTER_BEGIN(x) _G8((x),7)
+#define _DMA_LOCAL_FIFO_POINTER_END(x) _G8((x),15)
+#define _DMA_LOCAL_FIFO_POINTER_END_OF_PKT(x) _G8((x),23)
+#define _BGP_DCR_DMA_WARN_ERROR (_BGP_DCR_DMA+0xaf)
+
+/* offsets 0xb0 are reserved */
+
+/* ---- Clears ----- */
+#define _BGP_DCR_DMA_CLEAR0 (_BGP_DCR_DMA+0xb1)
+#define _DMA_CLEAR0_IMFU_ARB_WERR _BN(0)
+#define _DMA_CLEAR0_IMFU_COUNTER_UNDERFLOW _BN(1)
+#define _DMA_CLEAR0_IMFU_COUNTER_OVERFLOW _BN(2)
+#define _DMA_CLEAR0_RMFU_COUNTER_UNDERFLOW _BN(3)
+#define _DMA_CLEAR0_RMFU_COUNTER_OVERFLOW _BN(4)
+#define _DMA_CLEAR0_RMFU_ARB_WERR _BN(5)
+#define _DMA_CLEAR0_PQUE_WR0_BEN_WERR _BN(6)
+#define _DMA_CLEAR0_PQUE_WR0_ADDR_CHK_WERR _BN(7)
+#define _DMA_CLEAR0_PQUE_RD0_ADDR_CHK_WERR _BN(8)
+#define _DMA_CLEAR0_PQUE_WR1_BEN_WERR _BN(9)
+#define _DMA_CLEAR0_PQUE_WR1_ADDR_CHK_WERR _BN(10)
+#define _DMA_CLEAR0_PQUE_RD1_ADDR_CHK_WERR _BN(11)
+#define _DMA_CLEAR0_PQUE_WR0_HOLD_BAD_ADDR _BN(12)
+#define _DMA_CLEAR0_PQUE_RD0_HOLD_BAD_ADDR _BN(13)
+#define _DMA_CLEAR0_PQUE_WR1_HOLD_BAD_ADDR _BN(14)
+#define _DMA_CLEAR0_PQUE_RD1_HOLD_BAD_ADDR _BN(15)
+#define _DMA_CLEAR0_IFIFO_ARRAY_UE0 _BN(16)
+#define _DMA_CLEAR0_IFIFO_ARRAY_UE1 _BN(17)
+#define _DMA_CLEAR0_ICOUNTER_ARRAY_UE _BN(18)
+#define _DMA_CLEAR0_IMFU_DESC_UE _BN(19)
+#define _DMA_CLEAR0_RFIFO_ARRAY_UE0 _BN(20)
+#define _DMA_CLEAR0_RFIFO_ARRAY_UE1 _BN(21)
+#define _DMA_CLEAR0_RCOUNTER_ARRAY_UE _BN(22)
+#define _DMA_CLEAR0_LOCAL_FIFO_UE0 _BN(23)
+#define _DMA_CLEAR0_LOCAL_FIFO_UE1 _BN(24)
+
+#define _BGP_DCR_DMA_CLEAR1 (_BGP_DCR_DMA+0xb2)
+#define _DMA_CLEAR1_TS_LINK_CHK _BN(0)
+
+
+#endif
diff --git a/drivers/net/bgp_collective/ppc450.h b/drivers/net/bgp_collective/ppc450.h
new file mode 100644
index 00000000000000..0f312cb39671e3
--- /dev/null
+++ b/drivers/net/bgp_collective/ppc450.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2007, 2008 International Business Machines
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+#ifndef __DRIVERS__BLUEGENE__PPC450_H__
+#define __DRIVERS__BLUEGENE__PPC450_H__
+
+/* include asm instruction macros */
+/* #include <asm/ppc450.h> */
+
+/**********************************************************************
+ * DCR access wrapper
+ **********************************************************************/
+
+extern inline uint32_t mfdcrx(uint32_t dcrn)
+{
+ uint32_t value;
+ asm volatile ("mfdcrx %0,%1": "=r" (value) : "r" (dcrn) : "memory");
+ return value;
+}
+
+extern inline void mtdcrx(uint32_t dcrn, uint32_t value)
+{
+ asm volatile("mtdcrx %0,%1": :"r" (dcrn), "r" (value) : "memory");
+}
+
+/* volatile 32bit read */
+extern inline uint32_t in_be32_nosync(uint32_t *vaddr)
+{
+ volatile uint32_t *va = (volatile uint32_t *) vaddr;
+ /* _bgp_mbar(); */
+ return *va;
+}
+
+
+/**********************************************************************
+ * Helper functions to access IO via double hummer
+ **********************************************************************/
+
+extern inline void fpu_memcpy_16(void *dst, void *src)
+{
+ asm volatile("lfpdx 0,0,%0\n"
+ "stfpdx 0,0,%1\n"
+ :
+ : "b"(src), "b"(dst)
+ : "fr0", "memory");
+}
+
+extern inline void out_be128(void *port, void *ptrval)
+{
+ u32 tmp[4] __attribute__((aligned(16)));
+
+ if ((u32)ptrval & 0xf) {
+ memcpy(tmp, ptrval, 16);
+ ptrval = tmp;
+ }
+
+ fpu_memcpy_16(port, ptrval);
+}
+
+extern inline void outs_be128(void *port, void *src, unsigned num)
+{
+ u32 tmp[4] __attribute__((aligned(16)));
+
+ /* port must be 16 byte aligned */
+ BUG_ON((u32)port & 0xf);
+
+ if (unlikely((u32)src & 0xf)) {
+ /* unaligned destination */
+ while(num--) {
+ memcpy(tmp, src, 16);
+ fpu_memcpy_16(port, tmp);
+ src += 16;
+ }
+ } else {
+ while(num--) {
+ fpu_memcpy_16(port, src);
+ src += 16;
+ }
+ }
+}
+
+extern inline void outs_zero128(void *port, unsigned num)
+{
+ static u32 zero[4] __attribute__((aligned(16))) = {0, };
+ BUG_ON((u32)port & 0xf);
+
+ while (num--)
+ out_be128(port, zero);
+}
+
+/*
+ * in string operation similar to x86: reads block of data from port
+ * into memory
+ */
+extern inline void ins_be128(void *dest, void *port, unsigned num)
+{
+ u32 tmp[4] __attribute__((aligned(16)));
+
+ /* port must be 16 byte aligned */
+ BUG_ON((u32)port & 0xf);
+
+ if ((u32)dest & 0xf)
+ {
+ /* unaligned destination */
+ while(num--) {
+ fpu_memcpy_16(tmp, port);
+ memcpy(dest, tmp, 16);
+ dest += 16;
+ }
+ }
+ else
+ {
+ while(num--) {
+ fpu_memcpy_16(dest, port);
+ dest += 16;
+ }
+ }
+}
+
+extern inline void in_be128(void *dest, void *port)
+{
+ char tmp[16] __attribute__((aligned(16)));
+ void *ptr = dest;
+
+ if ((u32)dest & 0xf)
+ ptr = tmp;
+
+ fpu_memcpy_16(ptr, port);
+
+ if ((u32)dest & 0xf)
+ memcpy(dest, tmp, 16);
+}
+
+#endif /* !__DRIVERS__BLUEGENE__PPC450_H__ */
diff --git a/drivers/net/bgp_e10000/Makefile b/drivers/net/bgp_e10000/Makefile
new file mode 100644
index 00000000000000..c33c97ea491e2a
--- /dev/null
+++ b/drivers/net/bgp_e10000/Makefile
@@ -0,0 +1,5 @@
+# Makefile for BlueGene/P 10 GbE driver
+
+obj-$(CONFIG_BGP_E10000) += bgp_e10000.o
+
+bgp_e10000-objs := bgp_tomal.o bgp_emac.o bgp_e10000_main.o
diff --git a/drivers/net/bgp_e10000/bgp_e10000.h b/drivers/net/bgp_e10000/bgp_e10000.h
new file mode 100644
index 00000000000000..204217e3de0305
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_e10000.h
@@ -0,0 +1,175 @@
+/*
+ * bgp_e10000.h: common header file for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include <linux/proc_fs.h>
+#include <asm/io.h>
+
+#ifndef _BGP_E10000_H
+#define _BGP_E10000_H
+
+#define DBG_LEVEL1 1
+#define DBG_LEVEL2 (DBG_LEVEL1 | 2)
+#define DBG_LEVEL3 (DBG_LEVEL2 | 4)
+#define DBG_E10000 8
+#define DBG_EMAC 16
+#define DBG_TOMAL 32
+#define DBG_XSGS 64
+#define DBG_DEVBUS 128
+#define DBG_NAPI 256
+#define DBG_SCATTERGATHER 512
+
+#define BGP_E10000_MIN_MTU 256
+#define BGP_E10000_MAX_MTU 9000
+#define BGP_E10000_FCS_SIZE 4
+
+
+#ifdef CONFIG_BGP_E10000_DBG
+#include <asm/udbg.h>
+#define PRINTK(detail, format, args...) if (((detail) & CONFIG_BGP_E10000_DBG_LEVEL) == (detail)) udbg_printf("%s: " format, __FUNCTION__, ##args)
+#else
+#define PRINTK(detail, format, args...)
+#endif
+
+typedef unsigned char U8;
+typedef unsigned short U16;
+typedef unsigned int U32;
+typedef unsigned long long U64;
+
+
+typedef enum {
+ e10000_ras_none = 0x00,
+ e10000_ras_hw_not_found = 0x01,
+ e10000_ras_netdev_alloc_failure = 0x02,
+ e10000_ras_netdev_reg_failure = 0x03,
+ e10000_ras_mtu_invalid = 0x04,
+ e10000_ras_tx_timeout = 0x05,
+ e10000_ras_internal_error = 0x07,
+ e10000_ras_hw_failure = 0x09,
+ e10000_ras_link_error = 0x0a,
+ e10000_ras_phy_reset_error = 0x0b,
+ e10000_ras_emac_config_error = 0x0c,
+ e10000_ras_link_loss = 0x0d,
+
+ e10000_ras_max = 0xff
+} e10000_ras_id;
+
+
+typedef struct _E10000_PROC_ENTRY {
+ char* name;
+ void* addr;
+ struct proc_dir_entry* entry;
+} E10000_PROC_ENTRY;
+
+
+
+/* Generates a RAS event for ethernet. */
+void e10000_printr(U16 subComponent,
+ U16 id,
+ char* format,
+ ...);
+
+
+static inline U32 mfdcrx(U32 dcrNum)
+{
+ U32 dcrVal = 0;
+
+ asm volatile("mfdcrx %0,%1": "=r" (dcrVal) : "r" (dcrNum) : "memory");
+
+ return dcrVal;
+}
+
+
+static inline void mtdcrx(U32 dcrNum,
+ U32 dcrVal)
+{
+ asm volatile ("mtdcrx %0,%1": :"r" (dcrNum), "r" (dcrVal) : "memory");
+ isync();
+
+ return;
+}
+
+
+static inline void msync(void)
+{
+ do { asm volatile ("msync" : : : "memory"); } while(0);
+
+ return;
+}
+
+
+static inline int e10000_proc_read(char* page,
+ char** start,
+ off_t off,
+ int count,
+ int* eof,
+ void* data)
+{
+ int rc = 0;
+ int value;
+
+ /* Read the value of the associated address and print it. */
+ value = in_be32(data);
+ rc = snprintf(page, count, "%08x\n", value);
+
+ *eof = 1;
+
+ return rc;
+}
+
+
+static inline int e10000_proc_write(struct file* file,
+ const char* buffer,
+ unsigned long len,
+ void* data)
+{
+ unsigned int value;
+ char valStr[128];
+ int strLen = sizeof(valStr)-1;
+
+ if (strLen > len)
+ strLen = len;
+ if (copy_from_user(valStr, buffer, strLen))
+ return -EFAULT;
+ else if (len) {
+ char* endp;
+
+ /* NULL terminate the string of digits and convert to its numeric value. */
+ if (valStr[strLen-1] == '\n')
+ strLen--;
+ valStr[strLen] = '\0';
+ value = simple_strtoul(valStr, &endp, 0);
+
+ /* Write the value to the associated address. */
+ out_be32(data, value);
+ }
+
+ return len;
+}
+
+
+static inline struct proc_dir_entry* e10000_create_proc_entry(struct proc_dir_entry* dir,
+ char* name,
+ void* addr)
+{
+ struct proc_dir_entry* entry = create_proc_entry(name, S_IRUGO, dir);
+ if (entry) {
+ entry->nlink = 1;
+ entry->read_proc = e10000_proc_read;
+ entry->write_proc = e10000_proc_write;
+ entry->data = addr;
+ }
+
+ return entry;
+}
+
+#endif
diff --git a/drivers/net/bgp_e10000/bgp_e10000_main.c b/drivers/net/bgp_e10000/bgp_e10000_main.c
new file mode 100644
index 00000000000000..8bb8ff5d9f5e00
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_e10000_main.c
@@ -0,0 +1,567 @@
+/*
+ * bgp_e10000_main.c: net_device source for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/if_ether.h>
+#include <asm/reg_booke.h>
+#include <linux/proc_fs.h>
+#include <stdarg.h>
+#include <asm/bluegene_ras.h>
+#include <asm/bgp_personality.h>
+#include <asm/bluegene.h>
+
+#include "bgp_e10000.h"
+#include "bgp_emac.h"
+#include "bgp_tomal.h"
+
+
+static int e10000_change_mtu(struct net_device*, int);
+static int e10000_do_ioctl(struct net_device*, struct ifreq*, int);
+static struct net_device_stats* e10000_get_stats(struct net_device*);
+static int e10000_hard_start_xmit(struct sk_buff*, struct net_device*);
+static int e10000_open(struct net_device*);
+//static void e10000_set_multicast_list(struct net_device*);
+static int e10000_stop(struct net_device*);
+static void e10000_tx_timeout(struct net_device*);
+static int e10000_set_mac_address(struct net_device* netDev, void* macAddr);
+static void e10000_link_test(unsigned long);
+
+static struct net_device* e10000NetDev;
+static struct timer_list e10000LinkTimer;
+static const struct net_device_ops e10000NetDevOps = {
+ .ndo_open = e10000_open,
+ .ndo_stop = e10000_stop,
+ .ndo_start_xmit = e10000_hard_start_xmit,
+ .ndo_get_stats = e10000_get_stats,
+ .ndo_set_mac_address = e10000_set_mac_address,
+ .ndo_tx_timeout = e10000_tx_timeout,
+ .ndo_change_mtu = e10000_change_mtu,
+ .ndo_do_ioctl = e10000_do_ioctl,
+};
+
+static BGP_Personality_t bgpers;
+static void* e10000DevMapAddr;
+static unsigned int e10000DevMapLen;
+
+static int __init
+ e10000_init(void)
+{
+ int rc = 0;
+ TOMAL* tomal = NULL;
+ EMAC* emac = NULL;
+ struct proc_dir_entry* e10000Dir;
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry\n");
+
+ /* Determine if Ethernet HW is present. */
+ bluegene_getPersonality((void*) &bgpers, sizeof(bgpers));
+ if (bgpers.Network_Config.RankInPSet) { /* No HW so exit. */
+ rc = -ENODEV;
+ goto end;
+ }
+
+ /* Allocate ethernet device(s). */
+ e10000NetDev = alloc_etherdev(sizeof(EMAC));
+ if (!e10000NetDev) {
+ e10000_printr(bg_subcomp_linux, e10000_ras_netdev_alloc_failure,
+ "Failure allocating ethernet device.");
+ rc = -ENOMEM;
+ goto end;
+ }
+
+ /* Create /proc directory. */
+ e10000Dir = proc_mkdir("driver/e10000", NULL);
+
+ /* Create mapping for TOMAL and XEMAC devices. Since they are close in memory one mapping with */
+ /* a small hole in between will cover both. Tell CNS where XEMAC is mapped. */
+ e10000DevMapLen = XEMAC_BASE_ADDRESS + sizeof(XEMACRegs) - TOMAL_BASE_ADDRESS;
+ e10000DevMapAddr = ioremap(TOMAL_BASE_ADDRESS, e10000DevMapLen);
+ if (!e10000DevMapAddr) {
+ rc = -ENODEV;
+ goto end;
+ }
+ rc = bluegene_mapXEMAC(e10000DevMapAddr+(XEMAC_BASE_ADDRESS - TOMAL_BASE_ADDRESS));
+ if (rc) {
+ e10000_printr(bg_subcomp_linux, 0xff, "Failure registering XEMAC mapping with CNS.");
+ rc = -ENODEV;
+ goto unmap_dev;
+ }
+
+ /* Allocate and intialize TOMAL device. */
+ tomal = tomal_init(e10000DevMapAddr, e10000NetDev, CONFIG_BGP_E10000_RXB, CONFIG_BGP_E10000_TXB, NULL,
+ 0, 0, TOMAL_IRQ0, TOMAL_IRQ1, e10000Dir);
+ if (IS_ERR(tomal)) {
+ rc = (int) tomal;
+ goto unmap_dev;
+ }
+
+ /* Initialize XEMAC. */
+ e10000NetDev->irq = XEMAC_IRQ;
+ emac = (EMAC*) netdev_priv(e10000NetDev);
+ rc = emac_init((char*) e10000DevMapAddr + (XEMAC_BASE_ADDRESS - TOMAL_BASE_ADDRESS), emac, EMAC_TYPE_XEMAC,
+ tomal, 0, e10000NetDev, e10000Dir);
+ if (rc)
+ goto free_tomal;
+
+ /* Initialize network device operations. */
+ e10000NetDev->netdev_ops = &e10000NetDevOps;
+
+ /* Register the net_device. */
+ rc = register_netdev(e10000NetDev);
+ if (rc) {
+ e10000_printr(bg_subcomp_linux, e10000_ras_netdev_reg_failure,
+ "Failure registering net_device [%p].", e10000NetDev);
+ goto exit_emac;
+ }
+
+ /* Configure EMAC. */
+ rc = emac_configure(emac);
+ if (rc) {
+ e10000_printr(bg_subcomp_e10000, e10000_ras_emac_config_error,
+ "EMAC configuration error. rc=%d", rc);
+ goto exit_emac;
+ }
+
+ /* Initialize the timer. */
+ e10000LinkTimer.function = e10000_link_test;
+ e10000LinkTimer.data = (unsigned int) e10000NetDev;
+ init_timer(&e10000LinkTimer);
+
+ goto end;
+
+exit_emac:
+ emac_exit(emac);
+free_tomal:
+ tomal_exit(tomal);
+unmap_dev:
+ iounmap(e10000DevMapAddr);
+ free_netdev(e10000NetDev);
+end:
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "exit rc=0x%x\n", rc);
+
+ return rc;
+}
+
+
+
+static int e10000_set_mac_address(struct net_device* netDev, void* macAddr)
+{
+ int rc = -EINVAL;
+ struct sockaddr* sockAddr = (struct sockaddr*) macAddr;
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p, macAddr=%p\n",
+ netDev, macAddr);
+
+ if (is_valid_ether_addr(((struct sockaddr*) macAddr)->sa_data)) {
+ EMAC* emac = (EMAC*) netdev_priv(netDev);
+ unsigned long flags;
+
+ memcpy(netDev->dev_addr, sockAddr->sa_data, netDev->addr_len);
+
+ spin_lock_irqsave(&emac->lock, flags);
+ rc = emac_set_mac_address(emac);
+ spin_unlock_irqrestore(&emac->lock, flags);
+ } else
+ rc = -EADDRNOTAVAIL;
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+
+static int e10000_change_mtu(struct net_device* netDev,
+ int newMTU)
+{
+ int rc = 0;
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p, newMTU=%d\n",
+ netDev, newMTU);
+
+ if (newMTU < BGP_E10000_MIN_MTU || newMTU > BGP_E10000_MAX_MTU) {
+ e10000_printr(bg_subcomp_e10000, e10000_ras_mtu_invalid,
+ "Invalid MTU of [%d] specified. Valid MTU "
+ "values are [%d,%d].\n", newMTU, BGP_E10000_MIN_MTU,
+ BGP_E10000_MAX_MTU);
+ rc = -EINVAL;
+ } else if (netDev->mtu != newMTU && netif_running(netDev)) {
+/* #ifdef CONFIG_BGP_E10000_NAPI */
+/* netDev->weight = tomal->maxRxBuffers[channel]; */
+/* #endif */
+ netDev->mtu = newMTU;
+ }
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+static int e10000_do_ioctl(struct net_device* netDev,
+ struct ifreq* req,
+ int cmd)
+{
+ int rc = 0;
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p, req=%p, cmd=0x%x\n",
+ netDev, req, cmd);
+
+// printk(KERN_CRIT "IOCTL not supported yet\n");
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+static struct net_device_stats* e10000_get_stats(struct net_device* netDev)
+{
+ struct net_device_stats* stats = &((EMAC*) netdev_priv(netDev))->stats;
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\nexit - stats=%p\n",
+ netDev, stats);
+
+ return stats;
+}
+#ifdef CONFIG_BGP_E10000_DBG
+int e10000_diag_count ;
+/* If the 'skb' has fragments ( is a scatter-gather one), display them all and the base element too */
+static void diag_display_sk(struct sk_buff* skb)
+{
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ if( skb->data_len >= 4096 ||
+ e10000_diag_count > 0)
+ {
+ int f ;
+ if( e10000_diag_count > 0 ) e10000_diag_count -= 1 ;
+ printk(KERN_INFO "diag_display_sk skb=%p nr_frags=%d skb->data=%p skb->len=0x%08x skb->data_len=0x%08x e10000_diag_count=%d\n",
+ skb,nr_frags,skb->data,skb->len,skb->data_len,e10000_diag_count) ;
+ for(f=0;f<nr_frags;f += 1)
+ {
+ struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f];
+ printk(KERN_INFO " frags[%d]->(page=%p, page_offset=0x%08x, size=0x%08x)\n",
+ f,frag->page,frag->page_offset,frag->size) ;
+ }
+ }
+}
+#endif
+static int e10000_hard_start_xmit(struct sk_buff* skb,
+ struct net_device* netDev)
+{
+ int rc;
+ unsigned long flags;
+ EMAC* emac = netdev_priv(netDev);
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - skb=%p, netDev=%p\n",
+ skb, netDev);
+
+#ifdef CONFIG_BGP_E10000_DBG
+ if(DBG_SCATTERGATHER & CONFIG_BGP_E10000_DBG_LEVEL ) diag_display_sk(sk) ;
+#endif
+
+ spin_lock_irqsave(&emac->tomal->txLock[emac->channel], flags);
+ rc = tomal_xmit_tx_buffer(emac->tomal, emac->channel, skb);
+ if (likely(!rc)) {
+ emac->stats.tx_packets++;
+ emac->stats.tx_bytes += skb->len;
+ rc = NETDEV_TX_OK;
+ netDev->trans_start = jiffies;
+ } else {
+ netif_stop_queue(netDev);
+ rc = NETDEV_TX_BUSY;
+ }
+ spin_unlock_irqrestore(&emac->tomal->txLock[emac->channel], flags);
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+
+static int e10000_open(struct net_device* netDev)
+{
+ int rc = 0;
+ EMAC* emac = (EMAC*) netdev_priv(netDev);
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev);
+
+ if (!emac->opened) {
+ U32 linkTimer;
+ U8 rxLink, txLink;
+ struct sockaddr sockAddr;
+
+ /* Set the MAC address for this interface. */
+ memcpy(sockAddr.sa_data, bgpers.Ethernet_Config.EmacID, netDev->addr_len);
+ e10000_set_mac_address(netDev, &sockAddr);
+
+ /* Acquire locks for EMAC and TOMAL. */
+ spin_lock(&emac->tomal->rxLock[emac->channel]);
+ spin_lock(&emac->tomal->txLock[emac->channel]);
+ spin_lock(&emac->lock);
+
+ emac->opened = 1;
+
+#ifndef CONFIG_BGP_E10000_EMAC_LOOPBACK
+ /* Reset TOMAL */
+ tomal_soft_reset(emac->tomal);
+
+ /* PHY reset. */
+ rc = bluegene_macResetPHY();
+ if (rc) {
+ e10000_printr(bg_subcomp_e10000, e10000_ras_phy_reset_error,
+ "%s: PHY reset error.", netDev->name);
+ spin_unlock(&emac->lock);
+ spin_unlock(&emac->tomal->txLock[emac->channel]);
+ spin_unlock(&emac->tomal->rxLock[emac->channel]);
+ goto exit;
+ }
+
+ /* Wait for link to be ready. We wait less time for a single ION so that */
+ /* we timeout before the control system does. */
+ linkTimer = 240;
+ for (txLink = 0, rxLink = 0; linkTimer && (!txLink || !rxLink); linkTimer--) {
+ txLink = bluegene_macTestTxLink();
+ rxLink = bluegene_macTestRxLink();
+ udelay(100000);
+ }
+ printk(KERN_NOTICE "%s: Link status [RX%c,TX%c]\n", netDev->name,
+ rxLink ? '+' : '-', txLink ? '+' : '-');
+ if (!linkTimer) {
+ e10000_printr(bg_subcomp_e10000, e10000_ras_link_error,
+ "%s: No link detected.", netDev->name);
+ spin_unlock(&emac->lock);
+ spin_unlock(&emac->tomal->txLock[emac->channel]);
+ spin_unlock(&emac->tomal->rxLock[emac->channel]);
+ goto exit;
+ }
+#endif
+
+ /* Configure EMAC. */
+ rc = emac_configure(emac);
+ if (rc) {
+ e10000_printr(bg_subcomp_e10000, e10000_ras_emac_config_error,
+ "EMAC configuration error. rc=%d", rc);
+ spin_unlock(&emac->lock);
+ spin_unlock(&emac->tomal->txLock[emac->channel]);
+ spin_unlock(&emac->tomal->rxLock[emac->channel]);
+ goto exit;
+ }
+
+ /* Enable TX and RX for TOMAL and EMAC. */
+ tomal_rx_tx_enable(emac->tomal);
+ emac_rx_enable(emac);
+ emac_tx_enable(emac);
+
+ /* Enable IRQs. */
+ tomal_irq_enable(emac->tomal, emac->channel);
+ emac_irq_enable(emac);
+
+ /* Release the locks. */
+ spin_unlock(&emac->lock);
+ spin_unlock(&emac->tomal->txLock[emac->channel]);
+ spin_unlock(&emac->tomal->rxLock[emac->channel]);
+
+ /* Start the queues. */
+ netif_start_queue(netDev);
+
+ /* Start link timer. */
+ mod_timer(&e10000LinkTimer, jiffies + HZ);
+ }
+exit:
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+
+static void e10000_link_test(unsigned long data)
+{
+ struct net_device* netDev = (struct net_device*) data;
+ static unsigned int linkLossCount = 0;
+ u8 txLink = bluegene_macTestTxLink();
+ u8 rxLink = bluegene_macTestRxLink();
+
+ if (!txLink || !rxLink) {
+ /* Link gone. Have we reached the threshold where we are going to send a fatal event? */
+ if (linkLossCount == 30)
+ e10000_printr(bg_subcomp_e10000, e10000_ras_link_error,
+ "%s: Link error detected. Link status [RX%c,TX%c]\n", netDev->name,
+ rxLink ? '+' : '-', txLink ? '+' : '-');
+ else if (linkLossCount == 0)
+ /* Send non-fatal RAS when the link first disappears. */
+ e10000_printr(bg_subcomp_e10000, e10000_ras_link_loss,
+ "%s: Loss of link detected. Link status [RX%c,TX%c]\n", netDev->name,
+ rxLink ? '+' : '-', txLink ? '+' : '-');
+
+ linkLossCount++;
+ } else
+ /* Link present. Reset counter. */
+ linkLossCount = 0;
+
+ mod_timer(&e10000LinkTimer, jiffies + HZ);
+
+ return;
+}
+
+
+//static void e10000_set_multicast_list(struct net_device* netDev)
+//{
+// PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev);
+//
+// emac_set_multicast_list((EMAC*) netdev_priv(netDev));
+//
+// PRINTK(DBG_E10000 | DBG_LEVEL2, "exit\n");
+//
+// return;
+//}
+
+
+static int e10000_stop(struct net_device* netDev)
+{
+ int rc = 0;
+ EMAC* emac = (EMAC*) netdev_priv(netDev);
+ unsigned long tomalRxFlags;
+ unsigned long tomalTxFlags;
+ unsigned long emacFlags;
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev);
+
+ /* Acquire locks for EMAC and TOMAL. */
+ spin_lock_irqsave(&emac->tomal->rxLock[emac->channel], tomalRxFlags);
+ spin_lock_irqsave(&emac->tomal->txLock[emac->channel], tomalTxFlags);
+ spin_lock_irqsave(&emac->lock, emacFlags);
+
+ local_bh_disable();
+ del_timer_sync(&e10000LinkTimer);
+ netif_stop_queue(netDev);
+
+ emac->opened = 0;
+ emac_rx_disable(emac);
+ emac_tx_disable(emac);
+ emac_irq_disable(emac);
+ tomal_rx_tx_disable(emac->tomal);
+ tomal_irq_disable(emac->tomal, emac->channel);
+
+ /* Release locks for EMAC and TOMAL. */
+ spin_unlock_irqrestore(&emac->lock, emacFlags);
+ spin_unlock_irqrestore(&emac->tomal->txLock[emac->channel], tomalTxFlags);
+ spin_unlock_irqrestore(&emac->tomal->rxLock[emac->channel], tomalRxFlags);
+
+ local_bh_enable();
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+static void e10000_tx_timeout(struct net_device* netDev)
+{
+ EMAC* emac = (EMAC*) netdev_priv(netDev);
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry - netDev=%p\n", netDev);
+
+ e10000_printr(bg_subcomp_e10000, e10000_ras_tx_timeout,
+ "Transmission timeout at %u, elapsed time %u\n",
+ (U32) jiffies, (U32)(jiffies - netDev->trans_start));
+ emac->stats.tx_errors++;
+
+ /* Attempt to reset the interface. */
+ e10000_stop(netDev);
+ e10000_open(netDev);
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "exit\n");
+
+ return;
+}
+
+
+static void e10000_exit(void)
+{
+ EMAC* emac = netdev_priv(e10000NetDev);
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "entry\n");
+
+ /* Allow the HW to clean up. */
+ if (emac) {
+ if (emac->tomal)
+ tomal_exit(emac->tomal);
+ emac_exit(emac);
+ }
+
+ /* Unmap HW. */
+ if (e10000DevMapAddr)
+ iounmap(e10000DevMapAddr);
+
+ /* Unregister and free the net_device. */
+ if (e10000NetDev) {
+ unregister_netdev(e10000NetDev);
+ free_netdev(e10000NetDev);
+ }
+
+ PRINTK(DBG_E10000 | DBG_LEVEL2, "exit\n");
+
+ return;
+}
+
+
+extern int bgWriteRasStr(unsigned int component,
+ unsigned int subcomponent,
+ unsigned int errCode,
+ char* str,
+ unsigned int strLen);
+
+void e10000_printr(U16 subComponent,
+ U16 id,
+ char* format,
+ ...)
+{
+ va_list args;
+ int n;
+ char text[BG_RAS_DATA_MAX];
+
+ va_start(args, format);
+ n = vsnprintf(text, sizeof(text)-1, format, args);
+ va_end(args);
+ if (n < 0)
+ n = 0;
+
+ text[n] = '\0';
+ printk(KERN_WARNING "%s\n", text);
+ bgWriteRasStr(bg_comp_kernel, subComponent, id, text, 0);
+
+ return;
+}
+
+
+module_init(e10000_init);
+module_exit(e10000_exit);
+
+
+
+MODULE_DESCRIPTION("10Gb Ethernet Driver for BlueGene");
+MODULE_VERSION("2.0");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Andrew Tauferner");
diff --git a/drivers/net/bgp_e10000/bgp_emac.c b/drivers/net/bgp_e10000/bgp_emac.c
new file mode 100644
index 00000000000000..1afa02e1e01536
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_emac.c
@@ -0,0 +1,282 @@
+/*
+ * bgp_emac.c: XEMAC device for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#include "bgp_emac.h"
+#include "bgp_e10000.h"
+
+
+/* XEMAC accessible through /proc/driver/e10000/xemac/hw. */
+static E10000_PROC_ENTRY emac_hw_proc_entry[] = {
+ { "mode0", (void*) 0x00, NULL },
+ { "mode1", (void*) 0x04, NULL },
+ { "txMode0", (void*) 0x08, NULL },
+ { "txMode1", (void*) 0x0c, NULL },
+ { "rxMode", (void*) 0x10, NULL },
+ { "interruptStatus", (void*) 0x14, NULL },
+ { "interruptStatusEnable", (void*) 0x18, NULL },
+ { "individualAddrH", (void*) 0x1c, NULL },
+ { "individualAddrL", (void*) 0x20, NULL },
+ { "vlanTPID", (void*) 0x24, NULL },
+ { "vlanTCI", (void*) 0x28, NULL },
+ { "pauseTimerValue", (void*) 0x2c, NULL },
+ { "individualAddrHashTable0", (void*) 0x30, NULL },
+ { "individualAddrHashTable1", (void*) 0x34, NULL },
+ { "individualAddrHashTable2", (void*) 0x38, NULL },
+ { "individualAddrHashTable3", (void*) 0x3c, NULL },
+ { "groupAddrHashTable0", (void*) 0x40, NULL },
+ { "groupAddrHashTable1", (void*) 0x44, NULL },
+ { "groupAddrHashTable2", (void*) 0x48, NULL },
+ { "groupAddrHashTable3", (void*) 0x4c, NULL },
+ { "lastSourceAddrH", (void*) 0x50, NULL },
+ { "lastSourceAddrL", (void*) 0x54, NULL },
+ { "interPacketGapValue", (void*) 0x58, NULL },
+ { "staCtrl", (void*) 0x5c, NULL },
+ { "txRequestThreshold", (void*) 0x60, NULL },
+ { "rxLowHighWaterMark", (void*) 0x64, NULL },
+ { "sopCommandMode", (void*) 0x68, NULL },
+ { "secondaryIndividualAddrH", (void*) 0x6c, NULL },
+ { "secondaryIndividualAddrL", (void*) 0x70, NULL },
+ { "txOctetsCounter1", (void*) 0x74, NULL },
+ { "txOctetsCounter2", (void*) 0x78, NULL },
+ { "rxOctetsCounter1", (void*) 0x7c, NULL },
+ { "rxOctetsCounter2", (void*) 0x80, NULL },
+ { "revisionID", (void*) 0x84, NULL },
+ { "hwDebug", (void*) 0x88, NULL },
+ { NULL, 0, NULL }
+};
+
+
+
+static irqreturn_t emac_irq(int irq,
+ void* data)
+{
+ struct net_device* netDev = (struct net_device*) data;
+ EMAC* emac = (EMAC*) netdev_priv(netDev);
+ U32 isr = in_be32(&emac->regs->interruptStatus);
+ irqreturn_t rc = IRQ_NONE;
+
+ if (irq == netDev->irq) {
+ if ((isr & XEMAC_IS_TXPE) || (isr & XEMAC_IS_DB) || (isr & XEMAC_IS_TE)) {
+ rc = IRQ_HANDLED;
+ emac->stats.tx_errors++;
+ }
+ if (isr & XEMAC_IS_RXPE) {
+ rc = IRQ_HANDLED;
+ emac->stats.rx_errors++;
+ }
+ if (isr & XEMAC_IS_TFEI) {
+ rc = IRQ_HANDLED;
+ emac->stats.tx_errors++;
+ emac->stats.tx_fifo_errors++;
+ }
+ if (isr & XEMAC_IS_RFFI) {
+ rc = IRQ_HANDLED;
+ emac->stats.rx_errors++;
+ emac->stats.rx_over_errors++;
+ }
+ if (isr & XEMAC_IS_OVR) {
+ rc = IRQ_HANDLED;
+ emac->stats.rx_errors++;
+ emac->stats.rx_over_errors++;
+ }
+ if ((isr & XEMAC_IS_PSF) || (isr & XEMAC_IS_RTF) || (isr & XEMAC_IS_IRE)) { /* pause or runt frame or in range error? */
+ rc = IRQ_HANDLED;
+ }
+ if (isr & XEMAC_IS_BDF) {
+ rc = IRQ_HANDLED;
+ emac->stats.rx_errors++;
+ emac->stats.rx_frame_errors++;
+ }
+ if (isr & XEMAC_IS_LF) {
+ rc = IRQ_HANDLED;
+ emac->stats.rx_errors++;
+ }
+ if (isr & XEMAC_IS_BFCS) {
+ rc = IRQ_HANDLED;
+ emac->stats.rx_errors++;
+ emac->stats.rx_crc_errors++;
+ }
+ if ((isr & XEMAC_IS_FTL) || (isr & XEMAC_IS_ORE)) {
+ rc = IRQ_HANDLED;
+ emac->stats.rx_errors++;
+ emac->stats.rx_length_errors++;
+ }
+
+ out_be32(&emac->regs->interruptStatus, isr);
+ }
+
+ if (rc != IRQ_HANDLED)
+ e10000_printr(bg_subcomp_xemac, emac_ras_irq_unknown,
+ "Spurious interrupt - irq=%d, isr=0x%08x.", irq, isr);
+
+ return rc;
+}
+
+int __init emac_init(void* devMapAddr,
+ EMAC* emac,
+ U32 type,
+ TOMAL* tomal,
+ U8 channel,
+ struct net_device* netDev,
+ struct proc_dir_entry* procDir)
+{
+ int rc = -EINVAL;
+
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p, type=%d, tomal=%p, netDev=%p\n", emac, type,
+ tomal, netDev);
+
+ emac->type = type;
+ switch (type) {
+ case EMAC_TYPE_XEMAC: {
+ emac->regs = (XEMACRegs*) devMapAddr;
+ if (!emac->regs) {
+ e10000_printr(bg_subcomp_xemac, emac_ras_ioremap_error,
+ "Failure mapping XEMAC registers.");
+ rc = -ENXIO;
+ goto out;
+ }
+
+ /* Create /proc/driver/e10000/xemac/hw */
+ if (procDir) {
+ emac->parentDir = procDir;
+ emac->emacDir = proc_mkdir("xemac", procDir);
+ if (emac->emacDir) {
+ emac->hwDir = proc_mkdir("hw", emac->emacDir);
+ if (emac->hwDir) {
+ E10000_PROC_ENTRY* entry = emac_hw_proc_entry;
+
+ while (entry->name) {
+ entry->entry = e10000_create_proc_entry(emac->hwDir, entry->name,
+ (void*) ((U32) emac->regs + (U32) entry->addr));
+ if (!entry->entry)
+ printk(KERN_EMERG "Failure creating /proc entry %s\n", entry->name);
+
+ entry++;
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ default:
+ e10000_printr(bg_subcomp_xemac, e10000_ras_internal_error,
+ "Invalid EMAC type [%d].", type);
+ goto out;
+ }
+
+#ifndef CONFIG_BGP_E10000_EMAC_LOOPBACK
+ /* Initialize the PHY. */
+ emac->phy.phy_id = 0;
+ emac->phy.full_duplex = 1;
+ emac->phy.dev = netDev;
+#endif
+
+ /* Request IRQ. */
+ rc = request_irq(netDev->irq, emac_irq, IRQF_DISABLED, "BGP EMAC IRQ", (void*) netDev);
+ if (rc) {
+ e10000_printr(bg_subcomp_xemac, emac_ras_irq_not_available,
+ "Failure requesting IRQ [%d] - rc = %d", netDev->irq, rc);
+ goto out;
+ }
+
+ emac->tomal = tomal;
+ emac->channel = channel;
+ emac->netDev = netDev;
+ memset(&emac->stats, 0, sizeof(emac->stats));
+ spin_lock_init(&emac->lock);
+ emac->opened = 0;
+
+ goto out;
+
+out:
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "exit rc=%d\n", rc);
+
+ return rc;
+}
+
+
+int emac_configure(EMAC* emac)
+{
+ int rc = 0;
+
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac);
+
+ switch (emac->type) {
+ case EMAC_TYPE_XEMAC: {
+ XEMACRegs* reg = (XEMACRegs*) emac->regs;
+ U32 mode1 = XEMAC_MODE1_TRQ | XEMAC_MODE1_RFS8K |
+ XEMAC_MODE1_TFS8K | XEMAC_MODE1_JBEN |
+ XEMAC_MODE1_PSEN | XEMAC_MODE1_IFEN |
+ XEMAC_MODE1_OPB133MHZ | 0x00001000;
+ U32 rxMode = XEMAC_RX_MODE_SPAD | XEMAC_RX_MODE_SFCS | XEMAC_RX_MODE_PMME |
+ XEMAC_RX_MODE_MAE | XEMAC_RX_MODE_IAE | XEMAC_RX_MODE_BAE | XEMAC_RX_MODE_LFD |
+ XEMAC_RX_MODE_RFAF_16_32;
+
+ /* We must accept multicast frames so that pause frames aren't discarded. */
+ /* This means that EMAC must have multicast mode enabled and promiscuous multicast */
+ /* mode enabled. */
+ if (emac->netDev->flags & IFF_PROMISC)
+ rxMode |= XEMAC_RX_MODE_PME;
+ out_be32(&reg->rxMode, rxMode);
+ out_be32(&reg->rxLowHighWaterMark, 0x00800100);
+ out_be32(&reg->pauseTimerValue, 0x1000);
+
+#ifdef CONFIG_BGP_E10000_EMAC_LOOPBACK
+ mode1 |= XEMAC_MODE1_LPEN;
+#else
+ mode1 |= XEMAC_MODE1_SDR;
+#endif
+ out_be32(&reg->mode1, mode1);
+ out_be32(&reg->txMode1, 0x02200240);
+ out_be32(&reg->txRequestThreshold, 0x17000000);
+ break;
+ }
+ }
+
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+void emac_exit(EMAC* emac)
+{
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "entry\n");
+
+ /* Remove /proc entries. */
+ if (emac->emacDir) {
+ if (emac->hwDir) {
+ E10000_PROC_ENTRY* entry = emac_hw_proc_entry;
+
+ while (entry->name) {
+ if (entry->entry) {
+ remove_proc_entry(entry->entry->name, emac->emacDir);
+ entry->entry = NULL;
+ }
+ entry++;
+ }
+
+ remove_proc_entry(emac->hwDir->name, emac->emacDir);
+ emac->hwDir = NULL;
+ }
+ remove_proc_entry(emac->emacDir->name, emac->parentDir);
+ emac->emacDir = NULL;
+ }
+
+ /* Free the IRQ. */
+ free_irq(emac->netDev->irq, (void*) emac->netDev);
+
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "exit\n");
+
+ return;
+}
diff --git a/drivers/net/bgp_e10000/bgp_emac.h b/drivers/net/bgp_e10000/bgp_emac.h
new file mode 100644
index 00000000000000..3072b127ad621d
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_emac.h
@@ -0,0 +1,356 @@
+/*
+ * bgp_emac.h: XEMAC definition for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef _BGP_EMAC_H
+#define _BGP_EMAC_H
+
+#include <linux/netdevice.h>
+#include <linux/mii.h>
+#include <linux/delay.h>
+#include <linux/crc32.h>
+#include <linux/proc_fs.h>
+#include <asm/bluegene.h>
+#include <asm/bluegene_ras.h>
+
+#include "bgp_tomal.h"
+#include "bgp_e10000.h"
+
+
+#define XEMAC_IRQ_GROUP 9
+#define XEMAC_IRQ_GINT 0
+#define XEMAC_IRQ bic_hw_to_irq(XEMAC_IRQ_GROUP, XEMAC_IRQ_GINT)
+
+#define XEMAC_BASE_ADDRESS 0x720004000ULL
+
+
+
+typedef volatile struct _XEMACRegs { /* Offset Description */
+ U32 mode0; /* 00 mode register 0 */
+#define XEMAC_MODE0_RXIDL 0x80000000
+#define XEMAC_MODE0_TXIDL 0x40000000
+#define XEMAC_MODE0_SRST 0x20000000
+#define XEMAC_MODE0_TXEN 0x10000000
+#define XEMAC_MODE0_RXEN 0x08000000
+#define XEMAC_MODE0_WUEN 0x04000000
+ U32 mode1; /* 04 mode register 1 */
+#define XEMAC_MODE1_SDR 0x80000000
+#define XEMAC_MODE1_LPEN 0x40000000
+#define XEMAC_MODE1_VLEN 0x20000000
+#define XEMAC_MODE1_IFEN 0x10000000
+#define XEMAC_MODE1_PSEN 0x08000000
+#define XEMAC_MODE1_RFS2K 0x00100000
+#define XEMAC_MODE1_RFS4K 0x00180000
+#define XEMAC_MODE1_RFS8K 0x00200000
+#define XEMAC_MODE1_RFS16K 0x00280000
+#define XEMAC_MODE1_RFS32K 0x00300000
+#define XEMAC_MODE1_RFS64K 0x00380000
+#define XEMAC_MODE1_TFS2K 0x00020000
+#define XEMAC_MODE1_TFS4K 0x00030000
+#define XEMAC_MODE1_TFS8K 0x00040000
+#define XEMAC_MODE1_TFS16K 0x00050000
+#define XEMAC_MODE1_TFS32K 0x00060000
+#define XEMAC_MODE1_TRQ 0x00008000
+#define XEMAC_MODE1_JBEN 0x00000800
+#define XEMAC_MODE1_OPB66MHZ 0x00000008
+#define XEMAC_MODE1_OPB83MHZ 0x00000010
+#define XEMAC_MODE1_OPB100MHZ 0x00000018
+#define XEMAC_MODE1_OPB133MHZ 0x00000020
+ U32 txMode0; /* 08 TX mode register 0 */
+#define XEMAC_TX_MODE0_GNP 0x80000000
+#define XEMAC_TX_MODE0_TFAE_2_4 0x00000001
+#define XEMAC_TX_MODE0_TFAE_4_8 0x00000002
+#define XEMAC_TX_MODE0_TFAE_8_16 0x00000003
+#define XEMAC_TX_MODE0_TFAE_16_32 0x00000004
+#define XEMAC_TX_MODE0_TFAE_32_64 0x00000005
+#define XEMAC_TX_MODE0_TFAE_64_128 0x00000006
+#define XEMAC_TX_MODE0_TFAE_128_256 0x00000007
+ U32 txMode1; /* 0C TX mode register 1 */
+ U32 rxMode; /* 10 RX mode register */
+#define XEMAC_RX_MODE_SPAD 0x80000000
+#define XEMAC_RX_MODE_SFCS 0x40000000
+#define XEMAC_RX_MODE_ARRF 0x20000000
+#define XEMAC_RX_MODE_ARFE 0x10000000
+#define XEMAC_RX_MODE_LFD 0x08000000
+#define XEMAC_RX_MODE_ARIE 0x04000000
+#define XEMAC_RX_MODE_PPF 0x02000000
+#define XEMAC_RX_MODE_PME 0x01000000
+#define XEMAC_RX_MODE_PMME 0x00800000
+#define XEMAC_RX_MODE_IAE 0x00400000
+#define XEMAC_RX_MODE_MIAE 0x00200000
+#define XEMAC_RX_MODE_BAE 0x00100000
+#define XEMAC_RX_MODE_MAE 0x00080000
+#define XEMAC_RX_MODE_PUME 0x00040000
+#define XEMAC_RX_MODE_SIAE 0x00020000
+#define XEMAC_RX_MODE_RFAF_2_4 0x00000001
+#define XEMAC_RX_MODE_RFAF_4_8 0x00000002
+#define XEMAC_RX_MODE_RFAF_8_16 0x00000003
+#define XEMAC_RX_MODE_RFAF_16_32 0x00000004
+#define XEMAC_RX_MODE_RFAF_32_64 0x00000005
+#define XEMAC_RX_MODE_RFAF_64_128 0x00000006
+ U32 interruptStatus; /* 14 interrupt status register */
+#define XEMAC_IS_TXPE 0x20000000
+#define XEMAC_IS_RXPE 0x10000000
+#define XEMAC_IS_TFEI 0x08000000
+#define XEMAC_IS_RFFI 0x04000000
+#define XEMAC_IS_OVR 0x02000000
+#define XEMAC_IS_PSF 0x01000000
+#define XEMAC_IS_BDF 0x00800000
+#define XEMAC_IS_RTF 0x00400000
+#define XEMAC_IS_LF 0x00200000
+#define XEMAC_IS_BFCS 0x00080000
+#define XEMAC_IS_FTL 0x00040000
+#define XEMAC_IS_ORE 0x00020000
+#define XEMAC_IS_IRE 0x00010000
+#define XEMAC_IS_DB 0x00000100
+#define XEMAC_IS_TE 0x00000040
+#define XEMAC_IS_MMS 0x00000002
+#define XEMAC_IS_MMF 0x00000001
+ U32 interruptStatusEnable; /* 18 interrupt status enable register */
+ U32 individualAddrH; /* 1C bits 0-15 of main station unique address */
+ U32 individualAddrL; /* 20 bits 16-47 of main station unique address */
+ U32 vlanTPID; /* 24 VLAN tag ID */
+ U32 vlanTCI; /* 28 VLAN TCI register */
+ U32 pauseTimerValue; /* 2C pause timer register */
+ U32 individualAddrHashTable[4]; /* 30 individual addr. hash registers */
+ U32 groupAddrHashTable[4]; /* 40 group addr. hash register 1 */
+ U32 lastSourceAddrH; /* 50 bits 0-15 of last source address */
+ U32 lastSourceAddrL; /* 54 bits 16-47 of last source address */
+ U32 interPacketGapValue; /* 58 inter packet gap register */
+ U32 staCtrl; /* 5C STA control register */
+#define XEMAC_STAC_MGO 0x00008000
+#define XEMAC_STAC_PHE 0x00004000
+#define XEMAC_STAC_IM 0x00002000
+#define XEMAC_STAC_MII_READ 0x00001000
+#define XEMAC_STAC_MII_WRITE 0x00000800
+#define XEMAC_STAC_MDIO_ADDRESS 0x00002000
+#define XEMAC_STAC_MDIO_WRITE 0x00002800
+#define XEMAC_STAC_MDIO_READ 0x00003800
+#define XEMAC_STAC_MDIO_READ_INC 0x00003000
+ U32 txRequestThreshold; /* 60 TX request threshold register */
+#define XEMAC_TRT_64 0x00000000
+#define XEMAC_TRT_128 0x01000000
+#define XEMAC_TRT_192 0x02000000
+#define XEMAC_TRT_256 0x03000000
+ U32 rxLowHighWaterMark; /* 64 RX high/low water mark register */
+ U32 sopCommandMode; /* 68 SOP command mode register */
+ U32 secondaryIndividualAddrH; /* 6C bits 0-15 of sec. individual addr. reg */
+ U32 secondaryIndividualAddrL; /* 70 bits 16-47 of sec. individual addr. reg */
+ U32 txOctetsCounter1; /* 74 bits 0-31 of total TX octets (read first) */
+ U32 txOctetsCounter2; /* 78 bits 32-63 of total TX octets (read last) */
+ U32 rxOctetsCounter1; /* 7C bits 0-31 of total RX octets (read first) */
+ U32 rxOctetsCounter2; /* 80 bits 32-63 of total RX octets (read last) */
+ U32 revisionID; /* 84 revision ID */
+ U32 hwDbg; /* 88 hardware debug register */
+} XEMACRegs;
+
+
+
+
+typedef struct _EMAC {
+ U32 type;
+#define EMAC_TYPE_EMAC4 4
+#define EMAC_TYPE_XEMAC 10
+ XEMACRegs* regs;
+ TOMAL* tomal;
+ U8 channel;
+ struct mii_if_info phy;
+ struct net_device* netDev;
+ struct net_device_stats stats;
+ spinlock_t lock;
+ U8 opened;
+ struct proc_dir_entry* parentDir;
+ struct proc_dir_entry* emacDir;
+ struct proc_dir_entry* hwDir;
+
+} EMAC;
+
+
+typedef enum {
+ emac_ras_none = 0x00,
+ emac_ras_timeout = 0x01,
+ emac_ras_ioremap_error = 0x02,
+ emac_ras_irq_not_available = 0x03,
+ emac_ras_sta_addr_error = 0x04,
+ emac_ras_sta_read_error = 0x05,
+ emac_ras_sta_write_error = 0x06,
+ emac_ras_irq_unknown = 0x07,
+
+ emac_ras_internal_error = 0xfe,
+ emac_ras_max = 0xff
+} emac_ras_id;
+
+typedef enum {
+ phy_ras_none = 0x00,
+ phy_ras_timeout = 0x01,
+ phy_ras_not_found = 0x02,
+
+ phy_ras_max = 0xff
+} phy_ras_id;
+
+
+int __init emac_init(void* devMapAddr,
+ EMAC* emac,
+ U32 type,
+ TOMAL* tomal,
+ U8 channel,
+ struct net_device* netDev,
+ struct proc_dir_entry* procDir);
+
+int emac_configure(EMAC* emac);
+
+
+
+
+static inline int emac_soft_reset(EMAC* emac)
+{
+ int rc = 0;
+ U32 i;
+
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac);
+
+ /* Set the reset bit and wait for it to clear. */
+ out_be32(&emac->regs->mode0, XEMAC_MODE0_SRST);
+ for (i = 200; (in_be32(&emac->regs->mode0) & XEMAC_MODE0_SRST) && i; i--)
+ udelay(10000);
+ if (!i) {
+ e10000_printr(bg_subcomp_xemac, emac_ras_timeout,
+ "XEMAC failed reset");
+ rc = -ETIME;
+ }
+
+ return rc;
+}
+
+
+
+static inline int emac_rx_enable(EMAC* emac)
+{
+ U32 reg = in_be32(&emac->regs->mode0);
+
+ out_be32(&emac->regs->mode0, reg | XEMAC_MODE0_RXEN);
+
+ return 0;
+}
+
+
+static inline int emac_rx_disable(EMAC* emac)
+{
+ U32 reg = in_be32(&emac->regs->mode0);
+
+ out_be32(&emac->regs->mode0, reg & ~XEMAC_MODE0_RXEN);
+
+ return 0;
+}
+
+
+static inline int emac_tx_enable(EMAC* emac)
+{
+ U32 reg = in_be32(&emac->regs->mode0);
+
+ out_be32(&emac->regs->mode0, reg | XEMAC_MODE0_TXEN);
+ reg = in_be32(&emac->regs->txMode0);
+ out_be32(&emac->regs->txMode0, reg | XEMAC_TX_MODE0_GNP);
+
+ return 0;
+}
+
+
+static inline int emac_tx_disable(EMAC* emac)
+{
+ U32 reg = in_be32(&emac->regs->mode0);
+
+ out_be32(&emac->regs->mode0, reg & ~XEMAC_MODE0_TXEN);
+
+ return 0;
+}
+
+static inline int emac_irq_enable(EMAC* emac)
+{
+ out_be32(&emac->regs->interruptStatusEnable, XEMAC_IS_TXPE | XEMAC_IS_RXPE |
+ XEMAC_IS_TFEI | XEMAC_IS_RFFI | XEMAC_IS_OVR | XEMAC_IS_BDF |
+ XEMAC_IS_RTF | XEMAC_IS_LF | XEMAC_IS_BFCS | XEMAC_IS_FTL |
+ XEMAC_IS_ORE | XEMAC_IS_IRE | XEMAC_IS_DB | XEMAC_IS_TE);
+
+ return 0;
+}
+
+static inline int emac_irq_disable(EMAC* emac)
+{
+ out_be32(&emac->regs->interruptStatusEnable, 0);
+
+ return 0;
+}
+
+static inline int emac_set_mac_address(EMAC* emac)
+{
+ int rc = 0;
+
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac);
+
+ switch (emac->type) {
+ case EMAC_TYPE_XEMAC: {
+ XEMACRegs* reg = (XEMACRegs*) emac->regs;
+ struct net_device* netDev = emac->netDev;
+
+ out_be32(&reg->individualAddrH, netDev->dev_addr[0] << 8 |
+ netDev->dev_addr[1]);
+ out_be32(&reg->individualAddrL, netDev->dev_addr[2] << 24 |
+ netDev->dev_addr[3] << 16 | netDev->dev_addr[4] << 8 |
+ netDev->dev_addr[5]);
+ break;
+ }
+ }
+
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+static inline int emac_set_multicast_list(EMAC* emac)
+{
+ int rc = 0;
+ XEMACRegs* regs = (XEMACRegs*) emac->regs;
+
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "entry - emac=%p\n", emac);
+
+ if (emac->netDev->flags & IFF_MULTICAST &&
+ emac->netDev->mc_count > 0) {
+ U16 groupAddrHashTable[4] = {0, 0, 0, 0};
+ struct dev_mc_list* dmi;
+
+ for (dmi = emac->netDev->mc_list; dmi; dmi = dmi->next) {
+ U32 crc = ether_crc(6, (char*) dmi->dmi_addr);
+ U32 bit = 63 - (crc >> 26);
+
+ groupAddrHashTable[bit >> 4] |=
+ 0x8000 >> (bit & 0x0f);
+ }
+ regs->groupAddrHashTable[0] = groupAddrHashTable[0];
+ regs->groupAddrHashTable[1] = groupAddrHashTable[1];
+ regs->groupAddrHashTable[2] = groupAddrHashTable[2];
+ regs->groupAddrHashTable[3] = groupAddrHashTable[3];
+ }
+
+ PRINTK(DBG_EMAC | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+void emac_exit(EMAC* emac);
+
+
+
+
+#endif
diff --git a/drivers/net/bgp_e10000/bgp_tomal.c b/drivers/net/bgp_e10000/bgp_tomal.c
new file mode 100644
index 00000000000000..4878c8ffb92f07
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_tomal.c
@@ -0,0 +1,1892 @@
+/*
+ * bgp_tomal.c: TOMAL device for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify i
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+
+
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+
+#include <asm/bluegene_ras.h>
+#include <asm/udbg.h>
+
+#include "bgp_e10000.h"
+#include "bgp_tomal.h"
+#include "bgp_emac.h"
+
+
+static RxDescSegment* tomal_alloc_rx_segment(U32 numDescriptors);
+static void tomal_free_rx_segment(RxDescSegment* segment);
+static TxDescSegment* tomal_alloc_tx_segment(U32 numDescriptors);
+static void tomal_free_tx_segment(TxDescSegment* segment);
+static irqreturn_t tomal_irq0(int irq, void* data);
+static irqreturn_t tomal_irq1(int irq, void* data);
+
+
+/* TOMAL hardware accessible through /proc/driver/e10000/tomal/hw */
+static E10000_PROC_ENTRY tomal_hw_proc_entry[] = {
+ { "configurationCtrl", (void*) 0x0000, NULL },
+ { "revisionID", (void*) 0x0060, NULL },
+ { "packetDataEngineCtrl", (void*) 0x0400, NULL },
+ { "txNotificationCtrl", (void*) 0x0600, NULL },
+ { "txMinTimer", (void*) 0x0610, NULL },
+ { "txMaxTimer", (void*) 0x0620, NULL },
+ { "txMaxFrameNum0", (void*) 0x06c0, NULL },
+ { "txMaxFrameNum1", (void*) 0x07c0, NULL },
+ { "txMinFrameNum0", (void*) 0x06d0, NULL },
+ { "txMinFrameNum1", (void*) 0x07d0, NULL },
+ { "txFramePerServiceCtrl", (void*) 0x0650, NULL },
+ { "txHWCurrentDescriptorAddrH0", (void*) 0x0660, NULL },
+ { "txHWCurrentDescriptorAddrH1", (void*) 0x0760, NULL },
+ { "txHWCurrentDescriptorAddrL0", (void*) 0x0670, NULL },
+ { "txHWCurrentDescriptorAddrL1", (void*) 0x0770, NULL },
+ { "txPendingFrameCount0", (void*) 0x0690, NULL },
+ { "txPendingFrameCount1", (void*) 0x0790, NULL },
+ { "txAddPostedFrames0", (void*) 0x06a0, NULL },
+ { "txAddPostedFrames1", (void*) 0x07a0, NULL },
+ { "txNumberOfTransmittedFrames0", (void*) 0x06b0, NULL },
+ { "txNumberOfTransmittedFrames1", (void*) 0x07b0, NULL },
+ { "txEventStatus0", (void*) 0x06e0, NULL },
+ { "txEventStatus1", (void*) 0x07e0, NULL },
+ { "txEventMask0", (void*) 0x06f0, NULL },
+ { "txEventMask1", (void*) 0x07f0, NULL },
+ { "rxNotificationCtrl", (void*) 0x0f00, NULL },
+ { "rxMinTimer", (void*) 0x0f10, NULL },
+ { "rxMaxTimer", (void*) 0x0f20, NULL },
+ { "rxMaxFrameNum0", (void*) 0x1080, NULL },
+ { "rxMaxFrameNum1", (void*) 0x1180, NULL },
+ { "rxMinFrameNum0", (void*) 0x1090, NULL },
+ { "rxMinFrameNum1", (void*) 0x1190, NULL },
+ { "rxHWCurrentDescriptorAddrH0", (void*) 0x1020, NULL },
+ { "rxHWCurrentDescriptorAddrH1", (void*) 0x1120, NULL },
+ { "rxHWCurrentDescriptorAddrL0", (void*) 0x1030, NULL },
+ { "rxHWCurrentDescriptorAddrL1", (void*) 0x1130, NULL },
+ { "rxAddFreeBytes0", (void*) 0x1040, NULL },
+ { "rxAddFreeBytes1", (void*) 0x1140, NULL },
+ { "rxTotalBuffersSize0", (void*) 0x1050, NULL },
+ { "rxTotalBuffersSize1", (void*) 0x1150, NULL },
+ { "rxNumberOfReceivedFrames0", (void*) 0x1060, NULL },
+ { "rxNumberOfReceivedFrames1", (void*) 0x1160, NULL },
+ { "rxDroppedFramesCount0", (void*) 0x1070, NULL },
+ { "rxDroppedFramesCount1", (void*) 0x1170, NULL },
+ { "rxEventStatus0", (void*) 0x10a0, NULL },
+ { "rxEventStatus1", (void*) 0x11a0, NULL },
+ { "rxEventMask0", (void*) 0x10b0, NULL },
+ { "rxEventMask1", (void*) 0x11b0, NULL },
+ { "softwareNonCriticalErrorsStatus0", (void*) 0x1800, NULL },
+ { "softwareNonCriticalErrorsStatus1", (void*) 0x1900, NULL },
+ { "softwareNonCriticalErrorsEnable0", (void*) 0x1810, NULL },
+ { "softwareNonCriticalErrorsEnable1", (void*) 0x1910, NULL },
+ { "softwareNonCriticalErrorsMask0", (void*) 0x1820, NULL },
+ { "softwareNonCriticalErrorsMask1", (void*) 0x1920, NULL },
+ { "receiveDataBufferSpace", (void*) 0x1900, NULL },
+ { "transmitDataBuffer0FreeSpace", (void*) 0x1910, NULL },
+ { "transmitDataBuffer1FreeSpace", (void*) 0x1920, NULL },
+ { "rxMACStatus0", (void*) 0x1b20, NULL },
+ { "rxMACStatus1", (void*) 0x1c20, NULL },
+ { "rxMACStatusEnable0", (void*) 0x1b30, NULL },
+ { "rxMACStatusEnable1", (void*) 0x1c30, NULL },
+ { "rxMACStatusMask0", (void*) 0x1b40, NULL },
+ { "rxMACStatusMask1", (void*) 0x1c40, NULL },
+ { "txMACStatus0", (void*) 0x1b50, NULL },
+ { "txMACStatus1", (void*) 0x1c50, NULL },
+ { "txMACStatusEnable0", (void*) 0x1b60, NULL },
+ { "txMACStatusEnable1", (void*) 0x1c60, NULL },
+ { "txMACStatusMask0", (void*) 0x1b70, NULL },
+ { "txMACStatusMask1", (void*) 0x1c70, NULL },
+ { "hardwareErrorsStatus", (void*) 0x1e00, NULL },
+ { "hardwareErrorsEnable", (void*) 0x1e10, NULL },
+ { "hardwareErrorsMask", (void*) 0x1e20, NULL },
+ { "softwareCriticalErrorsStatus", (void*) 0x1f00, NULL },
+ { "softwareCriticalErrorsEnable", (void*) 0x1f10, NULL },
+ { "softwareCriticalErrorsMask", (void*) 0x1f20, NULL },
+ { "receiveDescriptorBadCodeFEC", (void*) 0x1f30, NULL },
+ { "transmitDescriptorBadCodeFEC", (void*) 0x1f40, NULL },
+ { "interruptStatus", (void*) 0x1f80, NULL },
+ { "interruptRoute", (void*) 0x1f90, NULL },
+ { "rxMACBadStatusCounter0", (void*) 0x2060, NULL },
+ { "rxMACBadStatusCounter1", (void*) 0x2160, NULL },
+ { "debugVectorsCtrl", (void*) 0x3000, NULL },
+ { "debugVectorsReadData", (void*) 0x3010, NULL },
+ { NULL, (void*) 0, NULL }
+};
+
+
+/* TOMAL software accessible through /proc/driver/e10000/tomal/sw */
+static E10000_PROC_ENTRY tomal_sw_proc_entry[] = {
+ { "rxMaxBuffers0", NULL, NULL },
+ { "rxMaxBuffers1", NULL, NULL },
+ { "rxBufferSize0", NULL, NULL },
+ { "rxBufferSize1", NULL, NULL },
+ { "rxDescSegmentAddr0", NULL, NULL },
+ { "rxDescSegmentAddr1", NULL, NULL },
+ { "rxOldDescSegmentAddr0", NULL, NULL },
+ { "rxOldDescSegmentAddr1", NULL, NULL },
+ { "txMaxBuffers0", NULL, NULL },
+ { "txMaxBuffers1", NULL, NULL },
+ { "txPendingBuffers0", NULL, NULL },
+ { "txPendingBuffers1", NULL, NULL },
+ { "txNumberOfTransmittedFrames0", NULL, NULL },
+ { "txNumberOfTransmittedFrames1", NULL, NULL },
+ { "txDescSegmentAddr0", NULL, NULL },
+ { "txDescSegmentAddr1", NULL, NULL },
+ { "txOldDescSegmentAddr0", NULL, NULL },
+ { "txOldDescSegmentAddr1", NULL, NULL },
+ { "txFreeDescSegmentAddr0", NULL, NULL },
+ { "txFreeDescSegmentAddr1", NULL, NULL },
+ { "irq0", NULL, NULL },
+ { "irq1", NULL, NULL },
+ { "numberOfNetrxDrops", NULL, NULL },
+ { "numberOfHwDrops0", NULL, NULL },
+ { "numberOfHwDrops1", NULL, NULL },
+ { "numberOfNotLast", NULL, NULL },
+/* { "txChecksumNONE", NULL, NULL }, */
+/* { "txChecksumPARTIAL", NULL, NULL }, */
+/* { "txChecksumUNNECESSARY", NULL, NULL }, */
+/* { "txChecksumCOMPLETE", NULL, NULL }, */
+ { NULL, NULL, NULL }
+};
+
+
+/* Allocate a single Rx descriptor segment with the specified number of descriptors. */
+static RxDescSegment* tomal_alloc_rx_segment(U32 numDescriptors)
+{
+ RxDescSegment* segment = NULL;
+ RxDesc* desc;
+ size_t size = numDescriptors * sizeof(RxDesc) + sizeof(BranchDesc);
+ dma_addr_t dmaHandle;
+
+ /* Allocate descriptor storage. */
+ desc = (RxDesc*) dma_alloc_coherent(NULL, size, &dmaHandle, GFP_KERNEL);
+ if (desc) {
+ /* Clear the descriptors. */
+ memset((void*) desc, 0, size);
+
+ /* Allocate a segment. */
+ segment = kmalloc(sizeof(RxDescSegment), GFP_KERNEL);
+ if (segment) {
+ segment->size = size;
+ segment->dmaHandle = dmaHandle;
+ segment->desc = desc;
+
+ segment->branchDesc = (BranchDesc*) &desc[numDescriptors];
+ segment->branchDesc->code = TOMAL_BRANCH_CODE;
+ segment->branchDesc->reserved = segment->branchDesc->nextDescAddrH = 0;
+ segment->branchDesc->nextDescAddrL = (U32) NULL;
+
+ /* Allocate storage for buffer pointers. */
+ segment->skb = (struct sk_buff**)
+ kmalloc(numDescriptors * sizeof(struct sk_buff*) +
+ sizeof(struct sk_buff*), GFP_KERNEL);
+ if (!segment->skb) {
+ kfree((void*) segment);
+ segment = NULL;
+ dma_free_coherent(NULL, size, (void*) desc, dmaHandle);
+ } else {
+ memset((void*) segment->skb, 0,
+ numDescriptors * sizeof(struct sk_buff*) + sizeof(struct sk_buff*));
+ segment->currDesc = segment->desc;
+ segment->currSkb = segment->skb;
+ segment->next = segment;
+ }
+ } else
+ dma_free_coherent(NULL, size, (void*) desc, dmaHandle);
+ }
+
+ return segment;
+}
+
+
+/* Allocate descriptor segment(s) until the specified number of Rx descriptors have been */
+/* created. */
+int tomal_alloc_rx_segments(TOMAL* tomal,
+ U8 channel,
+ U32 totalDescriptors)
+{
+ RxDescSegment* firstSegment = (RxDescSegment*) NULL;
+ RxDescSegment* prevSegment = (RxDescSegment*) NULL;
+ RxDescSegment* segment = (RxDescSegment*) NULL;
+ U32 numDescriptors = totalDescriptors;
+ U8 first = 1;
+ int rc;
+
+ /* Allocate RX segments until the indicated number of descriptors have been */
+ /* created. */
+ while (totalDescriptors && numDescriptors >= 1) {
+ /* Allocate an RX descriptor segment. */
+ segment = tomal_alloc_rx_segment(numDescriptors);
+ if (segment) {
+ /* If this was the first segment then remember it. */
+ if (first) {
+ firstSegment = prevSegment = segment;
+ first = 0;
+ }
+
+ /* Link the previous segment to the new segment. */
+ prevSegment->branchDesc->nextDescAddrL = (U32) segment->dmaHandle;
+ prevSegment->next = segment;
+
+ totalDescriptors -= numDescriptors;
+ } else {
+ /* Failure allocating a segment of the requested size. Reduce the size. */
+ numDescriptors /= 2;
+ }
+ }
+
+ /* All segments created? */
+ if (!segment) {
+ RxDescSegment* nextSegment = NULL;
+
+ /* Free any segments that were allocated. */
+ segment = prevSegment = firstSegment;
+ while (segment) {
+ nextSegment = segment->next;
+ BUG_ON(nextSegment == segment);
+
+ tomal_free_rx_segment(segment);
+
+ segment = nextSegment;
+ }
+ tomal->rxDescSegment[channel] = (RxDescSegment*) NULL;
+
+ e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error,
+ "Failure allocating RX descriptor segment - totalDescriptors=%d.",
+ totalDescriptors);
+ rc = -ENOMEM;
+ } else {
+ /* Link the last segment to the first. */
+ segment->branchDesc->nextDescAddrL = (U32) firstSegment->dmaHandle;
+ segment->next = firstSegment;
+
+ tomal->rxDescSegment[channel] = segment;
+ rc = 0;
+ }
+
+ /* Update TOMAL's view of the RX descriptors. */
+ out_be32(&tomal->regs[channel]->rxHWCurrentDescriptorAddrH, 0);
+ out_be32(&tomal->regs[channel]->rxHWCurrentDescriptorAddrL,
+ (U32) tomal->rxDescSegment[channel]->dmaHandle);
+
+ tomal->oldRxSegment[channel] = tomal->rxDescSegment[channel];
+ tomal->oldRxSegment[channel]->currDesc = tomal->oldRxSegment[channel]->desc;
+ tomal->oldRxSegment[channel]->currSkb = tomal->oldRxSegment[channel]->skb;
+
+ return rc;
+}
+
+
+/* Free the specified Rx descriptor segment. */
+static void tomal_free_rx_segment(RxDescSegment* segment)
+{
+ RxDesc* desc;
+ struct sk_buff** skb;
+
+ /* Look for any descriptors awaiting processing. */
+ for (desc = segment->desc, skb = segment->skb;
+ desc && desc != (RxDesc*) segment->branchDesc; desc++, skb++) {
+ if (*skb) {
+ dma_unmap_single(NULL, desc->buffHeadAddrL,
+ desc->postedLength, DMA_FROM_DEVICE);
+ dev_kfree_skb_any(*skb);
+ *skb = NULL;
+ }
+
+ desc->postedLength = 0;
+ }
+
+ /* Free SKB pointer storage. */
+ if (segment->skb)
+ kfree(segment->skb);
+
+ /* Free the descriptor storage. */
+ if (segment->desc)
+ dma_free_coherent(NULL, segment->size, (void*) segment->desc, segment->dmaHandle);
+
+ /* Free the segment. */
+ kfree((void*) segment);
+
+ return;
+}
+
+
+/* Free all Rx descriptor segments. */
+void tomal_free_rx_segments(TOMAL* tomal,
+ U8 channel)
+{
+ RxDescSegment* segment = tomal->rxDescSegment[channel];
+ RxDescSegment* startSegment = segment;
+ RxDescSegment* nextSegment;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+ while (segment) {
+ nextSegment = segment->next;
+
+ tomal_free_rx_segment(segment);
+ segment = nextSegment;
+
+ if (segment == startSegment)
+ break;
+ }
+ tomal->rxDescSegment[channel] = NULL;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit\n");
+
+ return;
+}
+
+
+/* Free all Rx buffers. */
+int tomal_free_rx_buffers(TOMAL* tomal,
+ U8 channel)
+{
+ int rc = 0;
+ RxDescSegment* segment = tomal->rxDescSegment[channel];
+ RxDescSegment* startSegment = segment;
+ RxDesc* desc;
+ struct sk_buff** skb;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+ while (segment) {
+ /* Look for any descriptors awaiting processing. */
+ for (desc = segment->desc, skb = segment->skb;
+ desc != (RxDesc*) segment->branchDesc; desc++, skb++) {
+ if (*skb) {
+ dma_unmap_single(NULL, desc->buffHeadAddrL,
+ desc->postedLength, DMA_FROM_DEVICE);
+ dev_kfree_skb_any(*skb);
+ *skb = NULL;
+ }
+
+ desc->postedLength = 0;
+ }
+
+ segment = segment->next;
+ if (segment == startSegment)
+ break;
+ }
+
+ /* Force TOMAL's total buffer size register back to zero. We do this by adding */
+ /* enough buffer space to make this 20 bit register wrap around. */
+ while (in_be32(&tomal->regs[channel]->rxTotalBufferSize) &&
+ (0x00100000 - in_be32(&tomal->regs[channel]->rxTotalBufferSize)) > 0x0000ffff)
+ out_be32(&tomal->regs[channel]->rxAddFreeBytes, 0xffff);
+ if (in_be32(&tomal->regs[channel]->rxTotalBufferSize))
+ out_be32(&tomal->regs[channel]->rxAddFreeBytes, 0x00100000 - in_be32(&tomal->regs[channel]->rxTotalBufferSize));
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+/* Returns the number of RX buffers that are waiting to be processed. An error is indicated */
+/* by a negative value. The caller should be holding the TOMAL lock for the specified channel. */
+int tomal_pending_rx_buffers(TOMAL* tomal,
+ U8 channel)
+{
+ int rc = 0;
+ RxDescSegment* segment = tomal->rxDescSegment[channel];
+ RxDescSegment* startSegment = segment;
+ RxDesc* desc;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+ do {
+ /* Look for any descriptors awaiting processing. */
+ for (desc = segment->desc; desc != (RxDesc*) segment->branchDesc; desc++)
+ if ((desc->status & TOMAL_RX_LAST) && desc->totalFrameLength)
+ rc++;
+
+ segment = segment->next;
+ } while (segment != startSegment);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+/* Returns the number of TX buffers that are queued for transmission. An error is indicated */
+/* by a negative value. The caller should be holding the TOMAL TX lock for the specified channel. */
+int tomal_pending_tx_buffers(TOMAL* tomal,
+ U8 channel)
+{
+ int rc = 0;
+ TxDescSegment* segment = tomal->txDescSegment[channel];
+ TxDescSegment* startSegment = segment;
+ TxDesc* desc;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+ do {
+ /* Look for any descriptors awaiting processing. */
+ for (desc = segment->desc; desc != (TxDesc*) segment->branchDesc; desc++)
+ if (desc->postedLength)
+ rc++;
+
+ segment = segment->next;
+ } while (segment != startSegment);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+/* Allocate a Tx descriptor segment with the specified number of descriptors. */
+static TxDescSegment* tomal_alloc_tx_segment(U32 numDescriptors)
+{
+ TxDescSegment* segment = NULL;
+ TxDesc* desc;
+ size_t size = numDescriptors * sizeof(TxDesc) + sizeof(BranchDesc);
+ dma_addr_t dmaHandle;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - numDescriptors=%d\n", numDescriptors);
+
+ /* Allocate descriptor storage. */
+ desc = (TxDesc*) dma_alloc_coherent(NULL, size, &dmaHandle, GFP_KERNEL);
+ if (desc) {
+ /* Clear the descriptor storage. */
+ memset((void*) desc, 0, size);
+
+ /* Allocate a segment. */
+ segment = kmalloc(sizeof(TxDescSegment), GFP_KERNEL);
+ if (segment) {
+ segment->size = size;
+ segment->dmaHandle = dmaHandle;
+ segment->desc = desc;
+
+ segment->branchDesc = (BranchDesc*) &segment->desc[numDescriptors];
+ segment->branchDesc->code = TOMAL_BRANCH_CODE;
+ segment->branchDesc->reserved = segment->branchDesc->nextDescAddrH = 0;
+ segment->branchDesc->nextDescAddrL = (U32) NULL;
+
+ /* Allocate storage for buffer pointers. */
+ segment->skb = (struct sk_buff**)
+ kmalloc((numDescriptors+1) * sizeof(struct sk_buff*), GFP_KERNEL);
+ if (!segment->skb) {
+ kfree((void*) segment);
+ segment = NULL;
+ dma_free_coherent(NULL, size, (void*) segment->desc, segment->dmaHandle);
+ } else {
+ memset((void*) segment->skb, 0,
+ (numDescriptors+1) * sizeof(struct sk_buff*));
+ segment->oldIndex = segment->freeIndex = 0;
+ segment->next = segment; /* by default point this segment at itself */
+ }
+ } else
+ dma_free_coherent(NULL, size, (void*) desc, dmaHandle);
+ }
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - segment=%p\n", segment);
+
+ return segment;
+}
+
+
+/* Allocate Tx descriptor segment(s) until the specified number of descriptors have been created. */
+int tomal_alloc_tx_segments(TOMAL* tomal,
+ U8 channel,
+ U32 totalDescriptors)
+{
+ TxDescSegment* firstSegment = (TxDescSegment*) NULL;
+ TxDescSegment* prevSegment = (TxDescSegment*) NULL;
+ TxDescSegment* segment = (TxDescSegment*) NULL;
+ U32 numDescriptors = totalDescriptors;
+ U8 first = 1;
+ int rc;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d, totalDescriptors=%d\n", tomal,
+ channel, totalDescriptors);
+
+ /* Allocate TX segments until the indicated number of descriptors have been */
+ /* created. */
+ while (totalDescriptors && numDescriptors >= 1) {
+ /* Allocate an TX descriptor segment. */
+ segment = tomal_alloc_tx_segment(numDescriptors);
+ if (segment) {
+ /* If this was the first segment then remember it. */
+ if (first) {
+ firstSegment = prevSegment = segment;
+ first = 0;
+ }
+
+ /* Link the previous segment to the new segment. */
+ prevSegment->branchDesc->nextDescAddrL = (U32) segment->dmaHandle;
+ prevSegment->next = segment;
+
+ totalDescriptors -= numDescriptors;
+ } else {
+ /* Failure allocating a segment of the requested size. Reduce the size. */
+ numDescriptors /= 2;
+ }
+ }
+
+ /* All segments created? */
+ if (!segment) {
+ TxDescSegment* nextSegment = NULL;
+
+ /* Free any segments that were allocated. */
+ segment = prevSegment = firstSegment;
+ while (segment) {
+ nextSegment = segment->next;
+ BUG_ON(nextSegment == segment);
+
+ tomal_free_tx_segment(segment);
+
+ segment = nextSegment;
+ }
+ tomal->txDescSegment[channel] = (TxDescSegment*) NULL;
+
+ e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error,
+ "TX descriptor allocation failure - totalDescriptors=%d.",
+ totalDescriptors);
+ rc = -ENOMEM;
+ } else {
+ /* Link the last segment to the first. */
+ segment->branchDesc->nextDescAddrL = (U32) firstSegment->dmaHandle;
+ segment->next = firstSegment;
+
+ tomal->txDescSegment[channel] = segment;
+ rc = 0;
+ }
+
+ /* Tell TOMAL where the descriptor storage is. */
+ out_be32(&tomal->regs[channel]->txHWCurrentDescriptorAddrH, 0);
+ out_be32(&tomal->regs[channel]->txHWCurrentDescriptorAddrL,
+ (U32) tomal->txDescSegment[channel]->dmaHandle);
+ tomal->pendingTxBuffers[channel] = 0;
+ tomal->oldTxSegment[channel] = tomal->freeTxSegment[channel] = tomal->txDescSegment[channel];
+ tomal->freeTxSegment[channel]->freeIndex = tomal->freeTxSegment[channel]->oldIndex =
+ tomal->freeTxSegment[channel]->oldIndex =
+ tomal->numberOfTransmittedFrames[channel] = 0;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+/* Free all Tx descriptor segments. */
+void tomal_free_tx_segments(TOMAL* tomal,
+ U8 channel)
+{
+ TxDescSegment* segment = tomal->txDescSegment[channel];
+ TxDescSegment* startSegment = segment;
+ TxDescSegment* nextSegment;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+ while (segment) {
+ nextSegment = segment->next;
+
+ tomal_free_tx_segment(segment);
+ segment = nextSegment;
+
+ if (segment == startSegment)
+ break;
+ }
+ tomal->txDescSegment[channel] = NULL;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit\n");
+
+ return;
+}
+
+
+/* Free the specified Tx segment. */
+void tomal_free_tx_segment(TxDescSegment* segment)
+{
+ TxDesc* desc;
+ struct sk_buff** skb;
+
+ /* Look for any descriptors with an associated buffer. */
+ for (desc = segment->desc, skb = segment->skb;
+ desc && desc != (TxDesc*) segment->branchDesc; desc++, skb++) {
+ if (*skb) {
+ dma_unmap_single(NULL, desc->buffHeadAddrL,
+ desc->postedLength, DMA_FROM_DEVICE);
+ dev_kfree_skb_any(*skb);
+ *skb = NULL;
+ }
+ desc->postedLength = 0;
+ }
+
+ /* Free SKB pointer storage. */
+ if (segment->skb)
+ kfree(segment->skb);
+
+ /* Free the descriptor storage. */
+ if (segment->desc)
+ dma_free_coherent(NULL, segment->size, (void*) segment->desc, segment->dmaHandle);
+
+ /* Free the segment. */
+ kfree((void*) segment);
+
+ return;
+}
+
+
+
+/* Free all Tx buffers. */
+void tomal_free_tx_buffers(TOMAL* tomal,
+ U8 channel)
+{
+ TxDescSegment* segment = tomal->txDescSegment[channel];
+ TxDescSegment* startSegment = segment;
+ TxDesc* desc;
+ struct sk_buff** skb;
+
+ while (segment) {
+ /* Look for any descriptors with an associated buffer. */
+ for (desc = segment->desc, skb = segment->skb;
+ desc != (TxDesc*) segment->branchDesc; desc++, skb++) {
+ if (*skb) {
+ dma_unmap_single(NULL, desc->buffHeadAddrL,
+ desc->postedLength, DMA_FROM_DEVICE);
+ dev_kfree_skb_any(*skb);
+ *skb = NULL;
+ }
+
+ desc->postedLength = 0;
+ }
+
+ segment = segment->next;
+ if (segment == startSegment)
+ break;
+ }
+
+ return;
+}
+
+
+
+int tomal_process_tx_buffers(TOMAL* tomal,
+ U8 channel,
+ register U32 framesToProcess)
+{
+ register TxDescSegment* segment = tomal->oldTxSegment[channel];
+ register TxDesc* desc = &segment->desc[segment->oldIndex];
+ register int skbFrag = 0;
+ register int rc = 0;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, channel=%d\n", tomal, channel);
+
+ /* Process the non-served descriptors, starting with the oldest. */
+ tomal->numberOfTransmittedFrames[channel] += framesToProcess;
+ while (likely(framesToProcess)) {
+ /* Have we reached the end of the segment? */
+ if (unlikely(desc == (TxDesc*) segment->branchDesc)) {
+ /* Reset the oldest descriptor pointer and move the oldest segment ahead. */
+ segment->oldIndex = 0;
+ tomal->oldTxSegment[channel] = segment = segment->next;
+ desc = segment->desc;
+ }
+
+ /* Process the current descriptor. */
+ PRINTK(DBG_TOMAL | DBG_LEVEL3, "xmit of buffer [%x] complete\n",
+ desc->buffHeadAddrL);
+
+ if (likely(desc->code & TOMAL_TX_LAST)) {
+ /* Unmap the buffer. Free the skb. Check descriptor status. Increment the */
+ /* transmitted frame count. */
+ dma_unmap_single(NULL, desc->buffHeadAddrL, desc->postedLength, DMA_TO_DEVICE);
+ dev_kfree_skb_irq(segment->skb[segment->oldIndex]);
+ segment->skb[segment->oldIndex] = NULL;
+ skbFrag = 0;
+ framesToProcess--;
+ if (unlikely(!(desc->wBStatus & TOMAL_TX_STATUS_GOOD)))
+ ((EMAC*) netdev_priv(tomal->netDev[channel]))->stats.tx_errors++;
+ } else
+ /* We have a fragmented skb and the first buffer is a special */
+ /* case because we didn't map an entire page for it. Unmap */
+ /* the buffer now. */
+ if (!skbFrag) {
+ dma_unmap_single(NULL, desc->buffHeadAddrL,
+ desc->postedLength, DMA_TO_DEVICE);
+ skbFrag = 1;
+ } else
+ /* Unmap the page that contains the current fragment. */
+ dma_unmap_page(NULL, desc->buffHeadAddrL,
+ desc->postedLength, DMA_TO_DEVICE);
+
+ /* Advance to next descriptor. */
+ desc++;
+ segment->oldIndex++;
+ rc++;
+ }
+
+ tomal->pendingTxBuffers[channel] -= rc;
+
+ /* Restart the TX counters. */
+ out_be32(&tomal->regs[0]->txNotificationCtrl, (channel ? TOMAL_TX_NOTIFY_CTRL_COUNTER_START1 : TOMAL_TX_NOTIFY_CTRL_COUNTER_START0));
+
+ if (unlikely(netif_queue_stopped(tomal->netDev[channel]) &&
+ (tomal->pendingTxBuffers[channel] + MAX_SKB_FRAGS + 1) < tomal->maxTxBuffers[channel]))
+ netif_wake_queue(tomal->netDev[channel]);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+
+/* Disable IRQs. */
+void tomal_irq_disable(TOMAL* tomal,
+ U8 channel)
+{
+ /* Disable TX & RX MAC event and interrupt generation. */
+ out_be32(&tomal->regs[channel]->rxMACStatusEnable, 0);
+ out_be32(&tomal->regs[channel]->txMACStatusEnable, 0);
+ out_be32(&tomal->regs[channel]->txMACStatusEnable, 0);
+ out_be32(&tomal->regs[channel]->txMACStatusMask, 0);
+
+ /* Disable HW error event and interrupt generation. */
+ out_be32(&tomal->regs[channel]->hwErrorsEnable, 0);
+ out_be32(&tomal->regs[channel]->hwErrorsMask, 0);
+
+ /* Disable SW critical and non-critical error event and */
+ /* interrupt generation. */
+ out_be32(&tomal->regs[channel]->swCriticalErrorsEnable, 0);
+ out_be32(&tomal->regs[channel]->swCriticalErrorsMask, 0);
+ out_be32(&tomal->regs[channel]->swNonCriticalErrorsEnable, 0);
+ out_be32(&tomal->regs[channel]->swNonCriticalErrorsMask, 0);
+
+ /* Disable TX & RX event interrupts. */
+ out_be32(&tomal->regs[channel]->rxEventMask, 0);
+ out_be32(&tomal->regs[channel]->txEventMask, 0);
+
+ return;
+}
+
+
+/* Enable IRQs and interrupt generation mechanisms. */
+void tomal_irq_enable(TOMAL* tomal,
+ U8 channel)
+{
+ /* Enable TX & RX MAC event and interrupt generation. */
+ out_be32(&tomal->regs[channel]->rxMACStatusEnable, TOMAL_RX_MAC_XEMAC_MASK);
+ out_be32(&tomal->regs[channel]->txMACStatusEnable, TOMAL_TX_MAC_XEMAC_MASK);
+ out_be32(&tomal->regs[channel]->txMACStatusEnable, TOMAL_TX_MAC_XEMAC_MASK);
+ out_be32(&tomal->regs[channel]->txMACStatusMask, TOMAL_TX_MAC_XEMAC_MASK);
+
+ /* Enable HW error event and interrupt generation. */
+ out_be32(&tomal->regs[channel]->hwErrorsEnable,
+ TOMAL_HW_ERRORS_IRAPE | TOMAL_HW_ERRORS_ORAPE |
+ TOMAL_HW_ERRORS_IDBPE | TOMAL_HW_ERRORS_ODBPE);
+ out_be32(&tomal->regs[channel]->hwErrorsMask,
+ TOMAL_HW_ERRORS_IRAPE | TOMAL_HW_ERRORS_ORAPE |
+ TOMAL_HW_ERRORS_IDBPE | TOMAL_HW_ERRORS_ODBPE);
+
+ /* Enable SW critical and non-critical error event and */
+ /* interrupt generation. */
+ out_be32(&tomal->regs[channel]->swCriticalErrorsEnable,
+ TOMAL_SW_CRIT_ERRORS_TDBC | TOMAL_SW_CRIT_ERRORS_RDBC);
+ out_be32(&tomal->regs[channel]->swCriticalErrorsMask,
+ TOMAL_SW_CRIT_ERRORS_TDBC | TOMAL_SW_CRIT_ERRORS_RDBC);
+ out_be32(&tomal->regs[channel]->swNonCriticalErrorsEnable,
+ TOMAL_SW_NONCRIT_ERRORS_TPDBC | TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+ out_be32(&tomal->regs[channel]->swNonCriticalErrorsMask,
+ TOMAL_SW_NONCRIT_ERRORS_TPDBC | TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+
+ /* Enable TX & RX event interrupts. */
+ out_be32(&tomal->regs[channel]->rxEventMask, TOMAL_RX_EVENT);
+ out_be32(&tomal->regs[channel]->txEventMask, TOMAL_TX_EVENT);
+
+ /* Enable TX counters. */
+ out_be32(&tomal->regs[0]->txNotificationCtrl,
+ (channel ? TOMAL_TX_NOTIFY_CTRL_COUNTER_START1 :
+ TOMAL_TX_NOTIFY_CTRL_COUNTER_START0));
+
+ /* Enable RX counters. */
+ out_be32(&tomal->regs[0]->rxNotificationCtrl,
+ (channel ? TOMAL_RX_NOTIFY_CTRL_COUNTER_START1 :
+ TOMAL_RX_NOTIFY_CTRL_COUNTER_START0));
+
+ return;
+}
+
+
+/* Handle IRQs for channel 0 and any IRQs not specific to any channel. */
+static irqreturn_t tomal_irq0(int irq,
+ void* data)
+{
+ int rc = IRQ_NONE;
+ TOMAL* tomal = (TOMAL*) data;
+ EMAC* emac = (EMAC*) netdev_priv(tomal->netDev[0]);
+ U32 isr = in_be32(&tomal->regs[0]->interruptStatus);
+#ifdef CONFIG_BGP_E10000_NAPI
+ int pollScheduled = 0;
+#endif
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - irq=%d isr=%08x\n", irq, isr);
+
+ if (likely(irq == tomal->irq0)) {
+ if (isr & TOMAL_INTERRUPT_RX0) {
+#ifndef CONFIG_BGP_E10000_NAPI
+ int budget = tomal->maxRxBuffers[0];
+#endif
+ PRINTK(DBG_NAPI, "TOMAL_INTERRUPT_RX0 - irq=%d isr=%08x\n", irq, isr);
+ spin_lock(&tomal->rxLock[0]);
+#ifdef CONFIG_BGP_E10000_NAPI
+ /* Disable further Rx interrupts. */
+ out_be32(&tomal->regs[0]->rxEventMask, 0);
+
+ /* Schedule Rx processing. */
+ napi_schedule(&(tomal->napi[0])) ;
+ pollScheduled = 1;
+#endif
+
+ /* Clear the RX interrupt. */
+ out_be32(&tomal->regs[0]->rxEventStatus, TOMAL_RX_EVENT);
+
+#ifndef CONFIG_BGP_E10000_NAPI
+ /* Process the buffers then allocate new ones. */
+ rc = tomal_poll(tomal->netDev[0], budget);
+ if (rc != 0)
+ printk(KERN_CRIT "Failure processing RX buffers [%d]\n", rc);
+#endif
+ spin_unlock(&tomal->rxLock[0]);
+ PRINTK(DBG_NAPI, "TOMAL_INTERRUPT_RX0 - IRQ_HANDLED\n");
+ rc = IRQ_HANDLED;
+ }
+ if (isr & TOMAL_INTERRUPT_TX0) {
+ spin_lock(&tomal->txLock[0]);
+
+ /* Clear any TX interrupt. */
+ out_be32(&tomal->regs[0]->txEventStatus, TOMAL_TX_EVENT);
+
+ /* Process the buffers that have been transmitted. */
+ rc = tomal_process_tx_buffers(tomal, 0,
+ in_be32(&tomal->regs[0]->txNumberOfTransmittedFrames)-tomal->numberOfTransmittedFrames[0]);
+ if (rc <0)
+ printk(KERN_CRIT "Failure processing TX buffers [%d]\n", rc);
+
+ spin_unlock(&tomal->txLock[0]);
+ rc = IRQ_HANDLED;
+ }
+ if (isr & TOMAL_INTERRUPT_TX_MAC_ERROR0) {
+ U32 status = in_be32(&tomal->regs[0]->txMACStatus);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_TX_MAC_ERROR0 [%08x]\n", status);
+
+ emac->stats.tx_errors++;
+
+ /* Clear the interrupt. */
+ out_be32(&tomal->regs[0]->txMACStatus, status);
+ rc = IRQ_HANDLED;
+ }
+ if (isr & TOMAL_INTERRUPT_RX_MAC_ERROR0) {
+ U32 status = in_be32(&tomal->regs[0]->rxMACStatus);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_RX_MAC_ERROR0 [%08x]\n", status);
+
+ emac->stats.rx_errors++;
+
+ /* Clear the interrupt. */
+ out_be32(&tomal->regs[0]->rxMACStatus, status);
+ rc = IRQ_HANDLED;
+ }
+ if (isr & TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0) {
+ U32 status = in_be32(&tomal->regs[0]->swNonCriticalErrorsStatus);
+#ifndef CONFIG_BGP_E10000_NAPI
+ int budget = tomal->maxRxBuffers[0];
+#else
+ U32 swNonCriticalErrorsMask;
+#endif
+
+ if (status & TOMAL_SW_NONCRIT_ERRORS_TPDBC) {
+ /* Checksum failed on requested frame. */
+ emac->stats.tx_errors++;
+ } else if (status & TOMAL_SW_NONCRIT_ERRORS_RTSDB) {
+ /* TOMAL has exhausted all the RX buffers. */
+ U32 hwdrops = in_be32(&tomal->regs[0]->rxDroppedFramesCount);
+ emac->stats.rx_dropped += hwdrops;
+ tomal->numberOfHwDrops0 += hwdrops;
+ out_be32(&tomal->regs[0]->rxDroppedFramesCount, 0);
+ emac->stats.rx_errors++;
+#ifndef CONFIG_BGP_E10000_NAPI
+ tomal_poll(tomal->netDev[0], budget);
+#else
+ /* Disable too short Rx buffer interrupt and schedule Rx processing. */
+ swNonCriticalErrorsMask = in_be32(&tomal->regs[0]->swNonCriticalErrorsMask);
+ out_be32(&tomal->regs[0]->swNonCriticalErrorsMask,
+ swNonCriticalErrorsMask & ~TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+ PRINTK(DBG_NAPI, "TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0 pollScheduled=%d\n",pollScheduled);
+ if (!pollScheduled)
+ napi_schedule(&(tomal->napi[0])) ;
+
+#endif
+ }
+ else
+ e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_noncrit_int,
+ "Unknown non-critical SW error [0x%08x].", status);
+
+ /* Clear the interrupt. */
+ out_be32(&tomal->regs[0]->swNonCriticalErrorsStatus, status);
+ rc = IRQ_HANDLED;
+ }
+ if (isr & TOMAL_INTERRUPT_CRITICAL_ERROR) {
+ U32 swStatus = in_be32(&tomal->regs[0]->swCriticalErrorsStatus);
+ U32 hwStatus = in_be32(&tomal->regs[0]->hwErrorsStatus);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_CRITICAL_ERROR [SW=%08x, HW=%08x]\n",
+ swStatus, hwStatus);
+
+ /* Check for software errors. */
+ if (swStatus & TOMAL_SW_CRIT_ERRORS_TDBC)
+ emac->stats.tx_errors++;
+ else if (swStatus & TOMAL_SW_CRIT_ERRORS_RDBC)
+ emac->stats.rx_errors++;
+ else if (swStatus)
+ e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_critical_int,
+ "Unknown critical SW error [%08x].", swStatus);
+
+ /* Check for hardware errors. */
+ if (hwStatus & (TOMAL_HW_ERRORS_IRAPE | TOMAL_HW_ERRORS_IDBPE))
+ emac->stats.rx_errors++;
+ else if (hwStatus & (TOMAL_HW_ERRORS_ORAPE | TOMAL_HW_ERRORS_ODBPE))
+ emac->stats.tx_errors++;
+ else if (hwStatus)
+ e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_critical_int,
+ "Unknown critical HW error [%08x].", hwStatus);
+
+ /* Clear the interrupt(s). */
+ out_be32(&tomal->regs[0]->hwErrorsStatus, hwStatus);
+ out_be32(&tomal->regs[0]->swCriticalErrorsStatus, swStatus);
+
+ /* Soft reset required here. */
+ tomal_soft_reset(tomal);
+ tomal_irq_enable(tomal, 0);
+
+ rc = IRQ_HANDLED;
+ }
+ if (rc != IRQ_HANDLED) {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq,
+ "Unhandled interrupt - irq=%d, isr=0x%08x, rc=%d",
+ irq, isr, rc);
+ }
+ } else {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq,
+ "Spurious interrupt - irq=%d, isr=0x%08x.",
+ irq, isr);
+ }
+
+ return rc;
+}
+
+/* Handle interrupts for channel 0. */
+static irqreturn_t tomal_irq1(int irq,
+ void* data)
+{
+ int rc = IRQ_NONE;
+ TOMAL* tomal = (TOMAL*) data;
+ EMAC* emac = (EMAC*) netdev_priv(tomal->netDev[1]);
+ U32 isr = in_be32(&tomal->regs[0]->interruptStatus);
+#ifdef CONFIG_BGP_E10000_NAPI
+ int pollScheduled = 0;
+#endif
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - irq=%d isr=%08x\n", irq, isr);
+
+ if (likely(irq == tomal->irq1)) {
+ if (isr & TOMAL_INTERRUPT_RX1) {
+#ifndef CONFIG_BGP_E10000_NAPI
+ int budget = tomal->maxRxBuffers[1];
+#endif
+ spin_lock(&tomal->rxLock[1]);
+
+#ifdef CONFIG_BGP_E10000_NAPI
+ /* Disable further Rx interrupts. */
+ out_be32(&tomal->regs[1]->rxEventMask, 0);
+
+ /* Schedule Rx processing. */
+ napi_schedule(&(tomal->napi[1])) ;
+ pollScheduled = 1;
+#endif
+
+ /* Clear the RX interrupt. */
+ out_be32(&tomal->regs[1]->rxEventStatus, TOMAL_RX_EVENT);
+
+#ifndef CONFIG_BGP_E10000_NAPI
+ /* Process the buffers then allocate new ones. */
+ rc = tomal_poll(tomal->netDev[1], budget);
+ if (rc != 0)
+ printk(KERN_CRIT "Failure processing RX buffers [%d]\n", rc);
+#endif
+ spin_unlock(&tomal->rxLock[1]);
+ rc = IRQ_HANDLED;
+ }
+ if (isr & TOMAL_INTERRUPT_TX1) {
+ spin_lock(&tomal->txLock[1]);
+
+ /* Clear any TX interrupt. */
+ out_be32(&tomal->regs[1]->txEventStatus, TOMAL_TX_EVENT);
+
+ /* Process the buffers that have been transmitted. */
+ rc = tomal_process_tx_buffers(tomal, 1,
+ in_be32(&tomal->regs[1]->txNumberOfTransmittedFrames) - tomal->numberOfTransmittedFrames[1]);
+ if (rc < 0)
+ printk(KERN_CRIT "Failure processing TX buffers [%d]\n", rc);
+
+ spin_unlock(&tomal->txLock[1]);
+ rc = IRQ_HANDLED;
+ }
+ if (isr & TOMAL_INTERRUPT_TX_MAC_ERROR1) {
+ U32 status = in_be32(&tomal->regs[1]->txMACStatus);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_TX_MAC_ERROR1 [%08x]\n", status);
+
+ emac->stats.tx_errors++;
+
+ /* Clear the interrupt. */
+ out_be32(&tomal->regs[1]->txMACStatus, status);
+ rc = IRQ_HANDLED;
+ }
+ if (isr & TOMAL_INTERRUPT_RX_MAC_ERROR1) {
+ U32 status = in_be32(&tomal->regs[1]->rxMACStatus);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL1, "TOMAL_INTERRUPT_RX_MAC_ERROR1 [%08x]\n", status);
+
+ emac->stats.rx_errors++;
+
+ /* Clear the interrupt. */
+ out_be32(&tomal->regs[1]->rxMACStatus, status);
+ rc = IRQ_HANDLED;
+ }
+ if (isr & TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0) {
+ U32 status = in_be32(&tomal->regs[1]->swNonCriticalErrorsStatus);
+#ifndef CONFIG_BGP_E10000_NAPI
+ int budget = tomal->maxRxBuffers[1];
+#else
+ U32 swNonCriticalErrorsMask;
+#endif
+ if (status & TOMAL_SW_NONCRIT_ERRORS_TPDBC)
+ emac->stats.tx_errors++;
+ else if (status & TOMAL_SW_NONCRIT_ERRORS_RTSDB) {
+ /* TOMAL has exhausted all the RX buffers. */
+ U32 hwdrops = in_be32(&tomal->regs[1]->rxDroppedFramesCount);
+ emac->stats.rx_dropped += hwdrops;
+ tomal->numberOfHwDrops1 += hwdrops;
+ out_be32(&tomal->regs[1]->rxDroppedFramesCount, 0);
+ emac->stats.rx_errors++;
+#ifndef CONFIG_BGP_E10000_NAPI
+ tomal_poll(tomal->netDev[1], budget);
+#else
+ /* Disable 'too short Rx buffer' interrupt and schedule Rx processing. */
+ swNonCriticalErrorsMask = in_be32(&tomal->regs[1]->swNonCriticalErrorsMask);
+ out_be32(&tomal->regs[1]->swNonCriticalErrorsMask,
+ swNonCriticalErrorsMask & ~TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+ if (!pollScheduled)
+ napi_schedule(&(tomal->napi[1])) ;
+#endif
+ } else
+ e10000_printr(bg_subcomp_tomal, tomal_ras_unknown_noncrit_int,
+ "Unknown non-critical SW error [0x%08x].", status);
+
+ /* Clear the interrupt. */
+ out_be32(&tomal->regs[1]->swNonCriticalErrorsStatus, status);
+ rc = IRQ_HANDLED;
+ }
+ if (rc != IRQ_HANDLED) {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq,
+ "Unhandled interrupt - irq=%d, isr=0x%08x, rc=%d",
+ irq, isr, rc);
+ }
+ } else {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_spurious_irq,
+ "Spurious interrupt - irq=%d, isr=0x%08x.", irq, isr);
+ }
+
+ return rc;
+}
+
+
+/* Configure TOMAL. */
+int tomal_configure(TOMAL* tomal)
+{
+ int rc = 0;
+ int c;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "entry - tomal=%p\n", tomal);
+
+ out_be32(&tomal->regs[0]->configurationCtrl, TOMAL_CFG_CTRL_RX_MAC0 |
+ TOMAL_CFG_CTRL_RX_MAC1 | TOMAL_CFG_CTRL_TX_MAC0 |
+ TOMAL_CFG_CTRL_TX_MAC1 | TOMAL_CFG_CTRL_PLB_FREQ_250);
+ out_be32(&tomal->regs[0]->consumerMemoryBaseAddr, 0);
+ out_be32(&tomal->regs[0]->packetDataEngineCtrl, TOMAL_PDE_CTRL_RX_PREFETCH1 |
+ TOMAL_PDE_CTRL_TX_PREFETCH1); /* prefetch 1 descriptor */
+ out_be32(&tomal->regs[0]->interruptRoute, TOMAL_IRQ1_MASK); /* route #1 ints to TOE_PLB_INT[1] */
+ for (c = 0; c < TOMAL_MAX_CHANNELS; c++)
+ if (tomal->netDev[c]) {
+ /* Allocate RX descriptors. */
+ rc = tomal_alloc_rx_segments(tomal, c, tomal->maxRxBuffers[c]);
+ if (rc) {
+ /* Failure allocating requested descriptors. */
+ BUG_ON(rc);
+ }
+
+ /* Allocate RX buffers and initialize RX descriptor info. */
+ tomal->oldRxSegment[c] = tomal->rxDescSegment[c];
+
+ rc = tomal_alloc_rx_buffers(tomal, c);
+ if (rc <= 0) {
+ if (c && tomal->netDev[0])
+ tomal_free_rx_buffers(tomal, 0);
+ break;
+ }
+ else
+ rc = 0;
+
+ /* Allocate TX descriptors and initialize TX descriptor info. */
+ rc = tomal_alloc_tx_segments(tomal, c, tomal->maxTxBuffers[c]);
+ if (rc) {
+ /* Failure allocating requested descriptors. */
+ printk(KERN_CRIT "Failure allocating %d TX descriptors.\n", tomal->maxTxBuffers[c]);
+ BUG_ON(rc);
+ }
+ tomal->pendingTxBuffers[c] = 0;
+ tomal->oldTxSegment[c] = tomal->freeTxSegment[c] = tomal->txDescSegment[c];
+ tomal->freeTxSegment[c]->freeIndex = tomal->freeTxSegment[c]->oldIndex =
+ tomal->numberOfTransmittedFrames[c] = tomal->numberOfReceivedFrames[c] = 0;
+
+ /* Initialize the timers and counters. */
+ out_be32(&tomal->regs[c]->txMinTimer, 255);
+ out_be32(&tomal->regs[c]->txMaxTimer, 255);
+ out_be32(&tomal->regs[c]->txMaxFrameNum, tomal->maxTxBuffers[c]);
+ out_be32(&tomal->regs[c]->txMinFrameNum, 255);
+ out_be32(&tomal->regs[c]->rxMinTimer, 255);
+ out_be32(&tomal->regs[c]->rxMaxTimer, 22);
+ out_be32(&tomal->regs[c]->rxMinFrameNum, 255);
+#ifdef CONFIG_BGP_E10000_NAPI
+ out_be32(&tomal->regs[c]->rxMaxFrameNum, 4);
+#else
+ out_be32(&tomal->regs[c]->rxMaxFrameNum, 64);
+#endif
+
+ /* Initialize spinlocks. */
+ spin_lock_init(&tomal->rxLock[c]);
+ spin_lock_init(&tomal->txLock[c]);
+
+#ifdef CONFIG_BGP_E10000_NAPI
+ netif_napi_add(tomal->netDev[c],&(tomal->napi[c]),tomal_poll_napi,tomal->maxRxBuffers[c]) ;
+ napi_enable(&(tomal->napi[c])) ;
+#endif
+ }
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+
+TOMAL* __init
+tomal_init(void* devMapAddr,
+ struct net_device* netDev0,
+ U32 rxTotalBufferSize0,
+ U32 numTxBuffers0,
+ struct net_device* netDev1,
+ U32 rxTotalBufferSize1,
+ U32 numTxBuffers1,
+ int irq0,
+ int irq1,
+ struct proc_dir_entry* procDir)
+{
+ TOMAL* tomal;
+ int rc = 0;
+ int c;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - netDev0=%p, rxTotalBufferSize0=%d, "
+ "numTxBuffers0=%d, netDev1=%p, rxTotalBufferSize1=%d, "
+ "numTxBuffers1=%d, irq0=%d, irq1=%d, procDir=%p\n", netDev0, rxTotalBufferSize0,
+ numTxBuffers0, netDev1, rxTotalBufferSize1, numTxBuffers1, irq0, irq1, procDir);
+
+ /* Allocate tomal object. */
+ tomal = kmalloc(sizeof(TOMAL), GFP_KERNEL);
+ if (!tomal) {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error,
+ "Failure allocating TOMAL device.");
+ rc = -ENOMEM;
+ goto end;
+ }
+ memset((void*) tomal, 0, sizeof(*tomal));
+
+ /* Map the TOMAL registers. */
+ tomal->regs[0] = (TOMALRegs*) devMapAddr;
+ if (!tomal->regs[0]) {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_ioremap_error,
+ "Failure maping TOMAL registers.");
+ rc = -ENXIO;
+ goto free_tomal;
+ }
+
+ /* Setup a register mapping for the second channel. The registers that */
+ /* are specific to the second channel are located 0x100 bytes past the */
+ /* registers specific to the first channel. Use this mapping for */
+ /* channel 1 specific registers only! */
+ tomal->regs[1] = (TOMALRegs*) ((U8*) tomal->regs[0]) + 0x100;
+
+ /* Register interrupt handlers. TOMAL has two interrupt lines. */
+ tomal->irq0 = irq0;
+ tomal->irq1 = irq1;
+ rc = request_irq(tomal->irq0, tomal_irq0, IRQF_DISABLED, "TOMAL IRQ0", (void*) tomal);
+ if (!rc) {
+ rc = request_irq(tomal->irq1, tomal_irq1, IRQF_DISABLED, "TOMAL IRQ1", (void*) tomal);
+ if (rc) {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_irq_unavailable,
+ "Unable to register IRQ - irq1=0x%08x.", irq1);
+ free_irq(tomal->irq0, tomal);
+ tomal->irq0 = 0xffffffff;
+ goto free_irqs;
+ }
+ } else {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_irq_unavailable,
+ "Unable to register IRQ - irq0=0x%08x.", irq0);
+ goto unmap_tomal_regs;
+ }
+
+ /* Create /proc/driver/e10000/tomal directory. */
+ tomal->parentDir = procDir;
+ if (procDir) {
+ tomal->tomalDir = proc_mkdir("tomal", procDir);
+ if (tomal->tomalDir) {
+ tomal->hwDir = proc_mkdir("hw", tomal->tomalDir);
+ if (tomal->hwDir) {
+ E10000_PROC_ENTRY* entry = tomal_hw_proc_entry;
+
+ while (entry->name) {
+ entry->entry = e10000_create_proc_entry(tomal->hwDir, entry->name, (void*)
+ ((U32) entry->addr + (U32) tomal->regs[0]));
+ entry++;
+ }
+ }
+ tomal->swDir = proc_mkdir("sw", tomal->tomalDir);
+ if (tomal->swDir) {
+ tomal_sw_proc_entry[0].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[0].name,
+ (void*) &tomal->maxRxBuffers[0]);
+ tomal_sw_proc_entry[1].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[1].name,
+ (void*) &tomal->maxRxBuffers[1]);
+ tomal_sw_proc_entry[2].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[2].name,
+ (void*) &tomal->rxBufferSize[0]);
+ tomal_sw_proc_entry[3].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[3].name,
+ (void*) &tomal->rxBufferSize[1]);
+ tomal_sw_proc_entry[4].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[4].name,
+ (void*) &tomal->rxDescSegment[0]);
+ tomal_sw_proc_entry[5].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[5].name,
+ (void*) &tomal->rxDescSegment[1]);
+ tomal_sw_proc_entry[6].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[6].name,
+ (void*) &tomal->oldRxSegment[0]);
+ tomal_sw_proc_entry[7].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[7].name,
+ (void*) &tomal->oldRxSegment[1]);
+ tomal_sw_proc_entry[8].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[8].name,
+ (void*) &tomal->maxTxBuffers[0]);
+ tomal_sw_proc_entry[9].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[9].name,
+ (void*) &tomal->maxTxBuffers[1]);
+ tomal_sw_proc_entry[10].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[10].name,
+ (void*) &tomal->pendingTxBuffers[0]);
+ tomal_sw_proc_entry[11].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[11].name,
+ (void*) &tomal->pendingTxBuffers[1]);
+ tomal_sw_proc_entry[12].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[12].name,
+ (void*) &tomal->numberOfTransmittedFrames[0]);
+ tomal_sw_proc_entry[13].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[13].name,
+ (void*) &tomal->numberOfTransmittedFrames[1]);
+ tomal_sw_proc_entry[14].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[14].name,
+ (void*) &tomal->txDescSegment[0]);
+ tomal_sw_proc_entry[15].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[15].name,
+ (void*) &tomal->txDescSegment[1]);
+ tomal_sw_proc_entry[16].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[16].name,
+ (void*) &tomal->oldTxSegment[0]);
+ tomal_sw_proc_entry[17].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[17].name,
+ (void*) &tomal->oldTxSegment[1]);
+ tomal_sw_proc_entry[18].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[18].name,
+ (void*) &tomal->freeTxSegment[0]);
+ tomal_sw_proc_entry[19].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[19].name,
+ (void*) &tomal->freeTxSegment[1]);
+ tomal_sw_proc_entry[20].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[20].name,
+ (void*) &tomal->irq0);
+ tomal_sw_proc_entry[21].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[21].name,
+ (void*) &tomal->irq1);
+ tomal_sw_proc_entry[22].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[22].name,
+ (void*) &tomal->numberOfNetrxDrops);
+ tomal_sw_proc_entry[23].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[23].name,
+ (void*) &tomal->numberOfHwDrops0);
+ tomal_sw_proc_entry[24].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[24].name,
+ (void*) &tomal->numberOfHwDrops1);
+ tomal_sw_proc_entry[25].entry =
+ e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[25].name,
+ (void*) &tomal->numberOfNotLast);
+/* tomal_sw_proc_entry[22].entry = */
+/* e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[22].name, */
+/* (void*) &tomal->count_tx_checksum_type[0]); */
+/* tomal_sw_proc_entry[23].entry = */
+/* e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[23].name, */
+/* (void*) &tomal->count_tx_checksum_type[1]); */
+/* tomal_sw_proc_entry[24].entry = */
+/* e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[24].name, */
+/* (void*) &tomal->count_tx_checksum_type[2]); */
+/* tomal_sw_proc_entry[25].entry = */
+/* e10000_create_proc_entry(tomal->swDir, tomal_sw_proc_entry[25].name, */
+/* (void*) &tomal->count_tx_checksum_type[3]); */
+ }
+ }
+ }
+
+ /* For each configured channel allocate descriptor segments and perform other initialization. */
+ tomal->netDev[0] = netDev0;
+ if (netDev0) {
+ tomal->rxBufferSize[0] = 9000 + ETH_HLEN + BGP_E10000_FCS_SIZE;
+ tomal->maxRxBuffers[0] = (rxTotalBufferSize0 <= TOMAL_RX_TOTAL_BUFFER_SIZE_MAX ? rxTotalBufferSize0 :
+ TOMAL_RX_TOTAL_BUFFER_SIZE_MAX) / tomal->rxBufferSize[0] ;
+ tomal->maxTxBuffers[0] = numTxBuffers0;
+ }
+ tomal->netDev[1] = netDev1;
+ if (netDev1) {
+ tomal->rxBufferSize[1] = 9000 + ETH_HLEN + BGP_E10000_FCS_SIZE;
+ tomal->maxRxBuffers[1] = (rxTotalBufferSize1 <= TOMAL_RX_TOTAL_BUFFER_SIZE_MAX ? rxTotalBufferSize1 :
+ TOMAL_RX_TOTAL_BUFFER_SIZE_MAX) / tomal->rxBufferSize[1];
+ tomal->maxTxBuffers[1] = numTxBuffers1;
+ }
+ for (c = 0; c < TOMAL_MAX_CHANNELS; c++) {
+ if (tomal->netDev[c]) {
+#ifdef CONFIG_BGP_E10000_IP_CHECKSUM
+ /* Tell the network stack that TOMAL performs IP checksum and */
+ /* that it can handle the transmission of scatter/gather data. */
+ tomal->netDev[c]->features |= (NETIF_F_SG | NETIF_F_IP_CSUM);
+#endif
+ tomal->netDev[c]->features |= (NETIF_F_HIGHDMA | NETIF_F_LLTX);
+
+ }
+ }
+ tomal_soft_reset(tomal);
+
+ goto end;
+
+free_irqs:
+ if (tomal->irq0)
+ free_irq(tomal->irq0, (void*) tomal);
+ if (tomal->irq1)
+ free_irq(tomal->irq1, (void*) tomal);
+
+unmap_tomal_regs:
+ tomal->regs[0] = NULL;
+
+free_tomal:
+ kfree((void*) tomal);
+
+end:
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return (rc ? ERR_PTR(rc) : tomal);
+}
+
+
+/* Allocate an SKB for each Rx descriptor that doesn't already reference one. */
+int tomal_alloc_rx_buffers(TOMAL* tomal,
+ U8 channel)
+{
+ int rc = 0;
+ RxDescSegment* segment;
+ RxDesc* desc;
+ RxDesc* startDesc;
+ struct sk_buff** skb;
+ U32 bytesAlloced = 0;
+ U32 buffersAlloced = 0;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p channel=%d\n", tomal, channel);
+
+ segment = tomal->rxDescSegment[channel];
+ desc = segment->desc;
+ startDesc = desc;
+ skb = segment->skb;
+
+ /* Iterate over all descriptors and allocate a buffer to any */
+ /* descriptors that don't already point to a buffer. */
+ do {
+ /* Have we reached the end of the segment? */
+ if (desc == (RxDesc*) segment->branchDesc) {
+ /* Move the descriptor segment pointer to the next segment. */
+ segment = segment->next;
+ desc = segment->desc;
+ skb = segment->skb;
+ if (desc == startDesc)
+ /* We've been through all descriptors. */
+ break;
+ }
+
+ /* If this descriptor is unused then allocate a buffer here. */
+ if (!desc->postedLength) {
+ /* Allocate a buffer. */
+ *skb = alloc_skb(tomal->rxBufferSize[channel] + 16, GFP_ATOMIC);
+ if (*skb) {
+ skb_reserve(*skb, 2);
+
+ /* Point a descriptor at the buffer. */
+ desc->code = TOMAL_RX_DESC_CODE;
+ desc->postedLength = tomal->rxBufferSize[channel];
+ desc->status = 0;
+ desc->totalFrameLength = 0;
+ desc->buffHeadAddrH = 0;
+ desc->buffHeadAddrL =
+ dma_map_single(NULL, (*skb)->data,
+ desc->postedLength,
+ DMA_FROM_DEVICE);
+ BUG_ON(!desc->buffHeadAddrL);
+
+ bytesAlloced += desc->postedLength;
+ buffersAlloced++;
+ } else {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_alloc_error,
+ "Failure allocating SKB.");
+ break;
+ }
+ }
+
+ /* Advance to the next descriptor and buffer. */
+ desc++;
+ skb++;
+ } while (desc != startDesc);
+
+ /* Now tell TOMAL about all the buffers allocated. */
+ /* We can add up to 64K at a time for a maximum total of 1MB. */
+ while (bytesAlloced) {
+ U32 size = (bytesAlloced <= 0xffff ? bytesAlloced : 0xffff);
+
+ BUG_ON(in_be32(&tomal->regs[channel]->rxTotalBufferSize) + size > 0x100000);
+ out_be32(&tomal->regs[channel]->rxAddFreeBytes, size);
+ bytesAlloced -= size;
+ }
+
+ rc = (rc ? rc : buffersAlloced);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+/* Receive frames until the indicated number of frames have been received or there are no more */
+/* frames available. */
+#if defined(CONFIG_BGP_E10000_NAPI)
+int tomal_poll_napi(struct napi_struct * napi, int budget) /* struct net_device* netDev, int* budget) */
+{
+ struct net_device *netDev = napi->dev ;
+#else
+int tomal_poll(struct net_device *netDev, int budget) /* struct net_device* netDev, int* budget) */
+{
+#endif
+ int rc;
+ EMAC* emac = (EMAC*) netdev_priv(netDev);
+ TOMAL* tomal = emac->tomal;
+ U8 channel = emac->channel;
+ RxDescSegment* segment = tomal->oldRxSegment[channel];
+ register RxDesc* desc = segment->currDesc;
+ register struct sk_buff** skb = segment->currSkb;
+ register const U32 buffLen = tomal->rxBufferSize[channel];
+ register const U32 skbSize = buffLen + 16;
+ register U32 rxNumberOfReceivedFrames = in_be32(&tomal->regs[channel]->rxNumberOfReceivedFrames);
+ register U32 framesToProcess = rxNumberOfReceivedFrames - tomal->numberOfReceivedFrames[channel];
+ register U32 framesReceived = 0;
+ register U32 bytesPosted = 0;
+ register int quota = min(budget, (int) framesToProcess);
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "entry - netDev=%p, budget=%d\n", netDev, budget);
+
+/* #ifdef CONFIG_BGP_E10000_NAPI */
+/* // Determine receive quota. */
+/* if (quota > netDev->quota) */
+/* quota = netDev->quota; */
+/* #endif */
+
+ /* Iterate over the RX descriptors, starting with the oldest, processing each */
+ /* data buffer that has been received until the indicated number of frames */
+ /* have been processed. */
+ while (likely((framesReceived < quota) && framesToProcess)) {
+ /* Is the current descriptor describing a valid frame? */
+ if (likely(desc->status & TOMAL_RX_LAST)) {
+ PRINTK(DBG_TOMAL | DBG_LEVEL3 | DBG_NAPI, "Received %d bytes to skb %p\n", desc->totalFrameLength, *skb);
+ if (likely((desc->status & TOMAL_RX_STATUS_CHECKSUM_VALID) &&
+ (desc->status & TOMAL_RX_STATUS_IP_CHECKSUM_PASSED) &&
+ (desc->status & TOMAL_RX_STATUS_TCP_UDP_CHECKSUM_PASSED)))
+ /* Valid checksum. */
+ (*skb)->ip_summed = CHECKSUM_UNNECESSARY;
+ else
+ (*skb)->ip_summed = CHECKSUM_NONE;
+ skb_put(*skb, desc->totalFrameLength);
+ (*skb)->dev = netDev;
+ (*skb)->protocol = eth_type_trans(*skb, netDev);
+#ifdef CONFIG_BGP_E10000_NAPI
+ PRINTK(DBG_NAPI, "netif_receive_skb\n");
+ rc = netif_receive_skb(*skb);
+#else
+ rc = netif_rx(*skb);
+#endif
+ *skb = NULL;
+ if (likely(rc == NET_RX_SUCCESS)) {
+ framesReceived++;
+ emac->stats.rx_bytes += desc->totalFrameLength;
+ } else if (rc == NET_RX_DROP || rc == NET_RX_BAD) {
+ emac->stats.rx_dropped++;
+ tomal->numberOfNetrxDrops ++ ;
+ } else
+ emac->stats.rx_errors++;
+ } else {
+ tomal->numberOfNotLast++ ;
+ }
+
+ /* Make the current slot in the Rx ring useable again. */
+ if (likely(*skb == NULL)) {
+ *skb = alloc_skb(skbSize, GFP_ATOMIC);
+ if (likely(*skb)) {
+ skb_reserve(*skb, 2); /* align */
+ desc->buffHeadAddrL = dma_map_single(NULL, (*skb)->data, buffLen, DMA_FROM_DEVICE);
+ desc->postedLength = buffLen;
+ bytesPosted += desc->postedLength;
+ } else
+ desc->postedLength = desc->buffHeadAddrL = 0;
+ } else /* Reinitialize this descriptor */
+ bytesPosted += desc->postedLength; /* descriptor avaialable again so repost */
+ desc->status = 0;
+
+ /* Post additional buffers to the device if we've accumulated enough. */
+ if (unlikely(bytesPosted >= 0xffff)) {
+ out_be32(&tomal->regs[channel]->rxAddFreeBytes, 0xffff);
+ bytesPosted -= 0xffff;
+ }
+
+ skb++;
+ desc++;
+ framesToProcess--;
+
+ /* Have we reached the end of the segment? */
+ if (unlikely(desc->code != TOMAL_RX_DESC_CODE)) {
+ /* Move to the next segment. */
+ segment->currDesc = segment->desc;
+ segment->currSkb = segment->skb;
+ tomal->oldRxSegment[channel] = segment = segment->next;
+ desc = segment->currDesc;
+ skb = segment->currSkb;
+ }
+ }
+
+ /* Post any remaining buffers to the device. */
+ if (likely(bytesPosted))
+ out_be32(&tomal->regs[channel]->rxAddFreeBytes, bytesPosted);
+
+ /* Update segment information and statistics. */
+ segment->currDesc = desc;
+ segment->currSkb = skb;
+ emac->stats.rx_packets += framesReceived;
+ tomal->numberOfReceivedFrames[channel] = rxNumberOfReceivedFrames - framesToProcess;
+
+ /* Reset the Rx notification mechanism. */
+ out_be32(&tomal->regs[0]->rxNotificationCtrl, (channel ? TOMAL_RX_NOTIFY_CTRL_COUNTER_START1 : TOMAL_RX_NOTIFY_CTRL_COUNTER_START0));
+
+#ifdef CONFIG_BGP_E10000_NAPI
+/* netDev->quota -= framesReceived; */
+ budget -= framesReceived;
+ if (framesReceived == quota) {
+ /* We processed all frames within the specified quota. Reenable interrupts */
+ /* and tell the kernel that we received everything available. */
+ U32 swNonCriticalErrorsMask = in_be32(&tomal->regs[0]->swNonCriticalErrorsMask);
+ PRINTK(DBG_NAPI, "napi_complete\n");
+ napi_complete(napi) ;
+ out_be32(&tomal->regs[channel]->rxEventMask, TOMAL_RX_EVENT);
+ if (!(swNonCriticalErrorsMask & TOMAL_SW_NONCRIT_ERRORS_RTSDB))
+ out_be32(&tomal->regs[0]->swNonCriticalErrorsMask,
+ swNonCriticalErrorsMask | TOMAL_SW_NONCRIT_ERRORS_RTSDB);
+ rc = 0;
+ } else
+ rc = 1;
+#else
+ rc = 0;
+#endif
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2 | DBG_NAPI, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+static inline U16 * frame_checksum_ptr(struct sk_buff* skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ unsigned int eth_proto = eth->h_proto ;
+ struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+ unsigned int iphlen = 4*iph->ihl ;
+ struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) );
+ struct udphdr *udph = (struct udphdr *) ( ((char *)(iph)) + (iphlen) );
+ unsigned int ip_proto = iph->protocol ;
+ if( eth_proto == ETH_P_IP) {
+ if( ip_proto == IPPROTO_TCP) return &(tcph->check) ;
+ if( ip_proto == IPPROTO_UDP) return &(udph->check) ;
+ }
+ return NULL ;
+
+}
+/* Transmit a frame. */
+/* Caller should be holding the TOMAL lock for the specified channel. */
+int tomal_xmit_tx_buffer(TOMAL* tomal,
+ U8 channel,
+ struct sk_buff* skb)
+{
+ int rc = 0;
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ int f = -1;
+ TxDescSegment* segment = tomal->freeTxSegment[channel];
+ U32 framesToProcess;
+ U32 buffLen;
+ dma_addr_t buffAddr;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p, skb=%p, channel=%d\n", tomal, skb, channel);
+
+ do {
+ /* Are we at the end of the segment? */
+ if (unlikely(segment->desc[segment->freeIndex].code == 0x20)) {
+ segment->freeIndex = 0;
+ tomal->freeTxSegment[channel] = segment = segment->next;
+ }
+
+ /* Point the next free descriptor(s) at the SKB buffer(s). The first buffer is a special case. */
+ if (f < 0) {
+ /* The data is in the skb's data buffer. */
+ buffLen = skb->len - skb->data_len;
+ buffAddr = dma_map_single(NULL, skb->data, buffLen, DMA_TO_DEVICE);
+/* tomal->count_tx_checksum_type[skb->ip_summed] += 1 ; */
+#ifdef CONFIG_BGP_E10000_IP_CHECKSUM
+ /* When using the IO node as a router (collective --> ethernet ) frames are coming across marked CHECKSUM_COMPLETE */
+ /* even though I think they should be marked CHECKSUM_PARTIAL. Use the TOMAL checksumming hardware on the frames. */
+/* if (skb->ip_summed == CHECKSUM_PARTIAL) */
+ if( 1)
+ {
+ /* Generate IP checksum for this frame. */
+ U16 * frame_ck_ptr=frame_checksum_ptr(skb) ;
+ if( frame_ck_ptr ) *frame_ck_ptr = 0 ;
+/* if( frame_ck_ptr && frame_ck_ptr != (U16*)(skb->head+skb->csum_start + skb->csum_offset)) */
+/* { */
+/* printk(KERN_INFO "(E) frame_ck_ptr=%p skb->head=%p skb->csum_start=%d skb->csum_offset=%d\n", */
+/* frame_ck_ptr,skb->head,skb->csum_start,skb->csum_offset) ; */
+/* } */
+/* *(U16*)(skb->head+skb->csum_start + skb->csum_offset) = 0; */
+ segment->desc[segment->freeIndex].command = TOMAL_TX_ENABLE_HW_CHECKSUM |
+ TOMAL_TX_GENERATE_FCS | TOMAL_TX_GENERATE_PAD;
+ } else {
+ segment->desc[segment->freeIndex].command = TOMAL_TX_GENERATE_FCS | TOMAL_TX_GENERATE_PAD;
+ }
+#else
+ segment->desc[segment->freeIndex].command = TOMAL_TX_GENERATE_FCS | TOMAL_TX_GENERATE_PAD;
+#endif
+
+ } else {
+ struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f];
+
+ /* Map the page that contains the current fragment. */
+ buffAddr = dma_map_page(NULL, frag->page, frag->page_offset, frag->size, DMA_TO_DEVICE);
+ buffLen = frag->size;
+ }
+
+ segment->desc[segment->freeIndex].wBStatus = 0;
+ segment->desc[segment->freeIndex].postedLength = buffLen;
+ segment->desc[segment->freeIndex].buffHeadAddrL = (U32) buffAddr;
+ segment->desc[segment->freeIndex].code = TOMAL_TX_DESC_CODE;
+ if (f == (nr_frags - 1)) { /* Last buffer? */
+ segment->desc[segment->freeIndex].code |= TOMAL_TX_NOTIFY_REQ | TOMAL_TX_SIGNAL | TOMAL_TX_LAST;
+ segment->skb[segment->freeIndex] = skb;
+
+ /* Post buffer(s) for transmission. */
+ PRINTK(DBG_TOMAL | DBG_LEVEL3, "Enqueueing buffer 0x%08x for xmit, index=%d, desc=%p, len=%d, code=0x%x\n",
+ (U32) buffAddr, segment->freeIndex, &segment->desc[segment->freeIndex], segment->desc[segment->freeIndex].postedLength,
+ segment->desc[segment->freeIndex].code);
+ smp_wmb();
+ out_be32(&tomal->regs[channel]->txAddPostedFrames, 1);
+ }
+
+ /* Advance to the next free descriptor index. */
+ segment->freeIndex++;
+ f++;
+ } while (f < nr_frags);
+ tomal->pendingTxBuffers[channel] += f+1;
+
+ /* Clean up any buffers for frames that have been transmitted. */
+ framesToProcess = in_be32(&tomal->regs[channel]->txNumberOfTransmittedFrames) - tomal->numberOfTransmittedFrames[channel];
+ if (unlikely(framesToProcess > 32)) {
+ int bufsProcessed = tomal_process_tx_buffers(tomal, channel, framesToProcess);
+ if (unlikely(bufsProcessed < 0))
+ printk(KERN_WARNING "%s: Error processing TX buffers [%d]\n",
+ tomal->netDev[channel]->name, bufsProcessed);
+ }
+
+ /* Stop the queue if we lack the space to transmit another frame. */
+ if (unlikely((tomal->pendingTxBuffers[channel] + MAX_SKB_FRAGS + 1) >
+ tomal->maxTxBuffers[channel]))
+ netif_stop_queue(tomal->netDev[channel]);
+
+ tomal->netDev[channel]->trans_start = jiffies;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
+
+
+
+void tomal_exit(TOMAL* tomal)
+{
+ int c;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry\n");
+
+ if (tomal) {
+ /* Release interrupt handlers. */
+ free_irq(TOMAL_IRQ0, tomal);
+ free_irq(TOMAL_IRQ1, tomal);
+
+ /* Free descriptor segments for each channel. */
+ for (c = 0; c < TOMAL_MAX_CHANNELS; c++) {
+ tomal_free_rx_segments(tomal, c);
+ tomal_free_tx_segments(tomal, c);
+
+ /* Unregister and free net_device */
+ if (tomal->netDev[c]) {
+ EMAC* emac = netdev_priv(tomal->netDev[c]);
+
+ /* Allow EMAC to cleanup. */
+ if (emac)
+ emac_exit(emac);
+
+ unregister_netdev(tomal->netDev[c]);
+ free_netdev(tomal->netDev[c]);
+ }
+ }
+
+ /* Remove /proc entries. */
+ if (tomal->tomalDir) {
+ if (tomal->hwDir) {
+ E10000_PROC_ENTRY* entry = tomal_hw_proc_entry;
+
+ while (entry->name) {
+ if (entry->entry) {
+ remove_proc_entry(entry->entry->name, tomal->hwDir);
+ entry->entry = NULL;
+ }
+ entry++;
+ }
+
+ remove_proc_entry(tomal->hwDir->name, tomal->tomalDir);
+ tomal->hwDir = NULL;
+ }
+ if (tomal->swDir) {
+ E10000_PROC_ENTRY* entry = tomal_sw_proc_entry;
+ while (entry->name) {
+ if (entry->entry) {
+ remove_proc_entry(entry->entry->name, tomal->swDir);
+ entry->entry = NULL;
+ }
+ entry++;
+ }
+
+ remove_proc_entry(tomal->swDir->name, tomal->tomalDir);
+ tomal->swDir = NULL;
+ }
+
+ remove_proc_entry(tomal->tomalDir->name, tomal->parentDir);
+ tomal->tomalDir = NULL;
+ }
+
+ /* Free the TOMAL object. */
+ kfree((void*) tomal);
+ }
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit\n");
+
+ return;
+}
+
+
+/* Reset and reconfigure the TOMAL hardware and reinitialize Rx descriptors. */
+int tomal_soft_reset(TOMAL* tomal)
+{
+ int rc = 0;
+ int c;
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "entry - tomal=%p\n", tomal);
+
+ /* Reset TOMAL and wait for it to finish. */
+ out_be32(&tomal->regs[0]->configurationCtrl, TOMAL_CFG_CTRL_SOFT_RESET);
+ for (c = 100; (in_be32(&tomal->regs[0]->configurationCtrl) & TOMAL_CFG_CTRL_SOFT_RESET) && c; c--)
+ udelay(10000);
+ if (!c) {
+ e10000_printr(bg_subcomp_tomal, tomal_ras_timeout,
+ "TOMAL reset failure.");
+ rc = -ETIME;
+ } else {
+ /* Reset EMAC(s) and free any buffers. */
+ for (c = 0; c < TOMAL_MAX_CHANNELS; c++)
+ if (tomal->netDev[c]) {
+ /* Free any RX and TX buffers. */
+ tomal_free_rx_buffers(tomal, c);
+ tomal_free_tx_buffers(tomal, c);
+
+ /* Free descriptor segments */
+ tomal_free_rx_segments(tomal, c);
+ tomal_free_tx_segments(tomal, c);
+ }
+
+ /* Reconfigure TOMAL. */
+ rc = tomal_configure(tomal);
+ }
+
+ PRINTK(DBG_TOMAL | DBG_LEVEL2, "exit - rc=%d\n", rc);
+
+ return rc;
+}
diff --git a/drivers/net/bgp_e10000/bgp_tomal.h b/drivers/net/bgp_e10000/bgp_tomal.h
new file mode 100644
index 00000000000000..d45ef5813793c9
--- /dev/null
+++ b/drivers/net/bgp_e10000/bgp_tomal.h
@@ -0,0 +1,423 @@
+/*
+ * bgp_tomal.h: Definition of TOMAL device for BlueGene/P 10 GbE driver
+ *
+ * Copyright (c) 2007, 2010 International Business Machines
+ * Author: Andrew Tauferner <ataufer@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ */
+
+#ifndef _BGP_TOMAL_H
+#define _BGP_TOMAL_H
+
+#include <asm/io.h>
+#include <asm/bluegene.h>
+#include <linux/skbuff.h>
+#include <linux/proc_fs.h>
+
+#include "bgp_e10000.h"
+
+#define TOMAL_MAX_CHANNELS 2
+
+
+#define TOMAL_RX_MAX_FRAME_NUM 10
+#define TOMAL_RX_MAX_TIMER 50
+
+
+#define TOMAL_IRQ_GROUP 8
+#define TOMAL_IRQ0_GINT 0
+#define TOMAL_IRQ1_GINT 1
+#define TOMAL_IRQ0 bic_hw_to_irq(TOMAL_IRQ_GROUP, TOMAL_IRQ0_GINT)
+#define TOMAL_IRQ1 bic_hw_to_irq(TOMAL_IRQ_GROUP, TOMAL_IRQ1_GINT)
+
+
+#define TOMAL_BASE_ADDRESS 0x720000000ULL
+typedef volatile struct _TOMALRegs {
+ U32 configurationCtrl; /* 0000 configuration control */
+#define TOMAL_CFG_CTRL_RX_MAC0 0x00800000
+#define TOMAL_CFG_CTRL_RX_MAC1 0x00400000
+#define TOMAL_CFG_CTRL_TX_MAC0 0x00200000
+#define TOMAL_CFG_CTRL_TX_MAC1 0x00100000
+#define TOMAL_CFG_CTRL_PLB_FREQ_250 0x00000000
+#define TOMAL_CFG_CTRL_PLB_FREQ_300 0x00040000
+#define TOMAL_CFG_CTRL_PLB_FREQ_350 0x00080000
+#define TOMAL_CFG_CTRL_PLB_FREQ_400 0x000c0000
+#define TOMAL_CFG_CTRL_PLB_M_POWER 0x00000080
+#define TOMAL_CFG_CTRL_SLEEP 0x00000002
+#define TOMAL_CFG_CTRL_SOFT_RESET 0x00000001
+ U32 reserved1[23]; /* 0004 */
+ U32 revisionID; /* 0060 revision id */
+ U32 reserved2[103]; /* 0064 */
+ U32 consumerMemoryBaseAddr; /* 0200 consumer memory base address */
+ U32 reserved3[127]; /* 0204 */
+ U32 packetDataEngineCtrl; /* 0400 packet data engine control */
+#define TOMAL_PDE_CTRL_RX_PREFETCH8 0x00000030
+#define TOMAL_PDE_CTRL_RX_PREFETCH1 0x00000000
+#define TOMAL_PDE_CTRL_TX_PREFETCH8 0x00000003
+#define TOMAL_PDE_CTRL_TX_PREFETCH1 0x00000000
+ U32 reserved4[127]; /* 0404 */
+ U32 txNotificationCtrl; /* 0600 TX notification control */
+#define TOMAL_TX_NOTIFY_CTRL_COUNTER_START0 0x00000020
+#define TOMAL_TX_NOTIFY_CTRL_COUNTER_START1 0x00000010
+ U32 reserved5[3]; /* 0604 */
+ U32 txMinTimer; /* 0610 TX min timer */
+ U32 reserved6[3]; /* 0614 */
+ U32 txMaxTimer; /* 0620 TX max timer */
+ U32 reserved7[11]; /* 0624 */
+ U32 txFramePerServiceCtrl; /* 0650 TX frame / service control */
+ U32 reserved8[3]; /* 0654 */
+ U32 txHWCurrentDescriptorAddrH; /* 0660 TX HW current desc. addr. High */
+ U32 reserved9[3]; /* 0664 */
+ U32 txHWCurrentDescriptorAddrL; /* 0670 TX HW current desc. addr. Low */
+ U32 reserved10[7]; /* 0674 */
+ U32 txPendingFrameCount; /* 0690 TX pending frame count */
+#define TOMAL_MAX_TX_PENDING_FRAMES 216
+ U32 reserved11[3]; /* 0694 */
+ U32 txAddPostedFrames; /* 06A0 TX add posted frames */
+ U32 reserved12[3]; /* 06A4 */
+ U32 txNumberOfTransmittedFrames; /* 06B0 TX number transmitted frames */
+ U32 reserved13[3]; /* 06B4 */
+ U32 txMaxFrameNum; /* 06C0 TX max frame number */
+ U32 reserved14[3]; /* 06C4 */
+ U32 txMinFrameNum; /* 06D0 TX min frame number */
+ U32 reserved15[3]; /* 06D4 */
+ U32 txEventStatus; /* 06E0 TX event status */
+#define TOMAL_TX_EVENT 0x00000001
+ U32 reserved16[3]; /* 06E4 */
+ U32 txEventMask; /* 06F0 TX event mask */
+ U32 reserved17[515]; /* 06F4 */
+ U32 rxNotificationCtrl; /* 0F00 RX notification control */
+#define TOMAL_RX_NOTIFY_CTRL_COUNTER_START0 0x00000080
+#define TOMAL_RX_NOTIFY_CTRL_COUNTER_START1 0x00000040
+ U32 reserved18[3]; /* 0F04 */
+ U32 rxMinTimer; /* 0F10 RX minimum timer */
+ U32 reserved19[3]; /* 0F14 */
+ U32 rxMaxTimer; /* 0F20 RX maximum timer */
+ U32 reserved20[63]; /* 0F24 */
+ U32 rxHWCurrentDescriptorAddrH; /* 1020 RX HW current desc. addr. High */
+ U32 reserved21[3]; /* 1024 */
+ U32 rxHWCurrentDescriptorAddrL; /* 1030 RX HW current desc. addr. Low */
+ U32 reserved22[3]; /* 1034 */
+ U32 rxAddFreeBytes; /* 1040 num bytes in RX buffers posted */
+ U32 reserved23[3]; /* 1044 */
+ U32 rxTotalBufferSize; /* 1050 total size of buffers */
+#define TOMAL_RX_TOTAL_BUFFER_SIZE_MAX 0x00100000
+ U32 reserved24[3]; /* 1054 */
+ U32 rxNumberOfReceivedFrames; /* 1060 total frames received */
+ U32 reserved25[3]; /* 1064 */
+ U32 rxDroppedFramesCount; /* 1070 total frames dropped */
+ U32 reserved26[3]; /* 1074 */
+ U32 rxMaxFrameNum; /* 1080 num frames RX to interrupt */
+ U32 reserved27[3]; /* 1084 */
+ U32 rxMinFrameNum; /* 1090 num frames RX to int w/timer */
+ U32 reserved28[3]; /* 1094 */
+ U32 rxEventStatus; /* 10A0 RX status of */
+#define TOMAL_RX_EVENT 0x00000001
+ U32 reserved29[3]; /* 10A4 */
+ U32 rxEventMask; /* 10B0 RX event mask of */
+ U32 reserved30[467]; /* 10B4 */
+ U32 swNonCriticalErrorsStatus; /* 1800 software noncritical error status */
+#define TOMAL_SW_NONCRIT_ERRORS_TPDBC 0x00000010
+#define TOMAL_SW_NONCRIT_ERRORS_RTSDB 0x00000001
+ U32 reserved31[3]; /* 1804 */
+ U32 swNonCriticalErrorsEnable; /* 1810 software noncritical error enable */
+ U32 reserved32[3]; /* 1814 */
+ U32 swNonCriticalErrorsMask; /* 1820 software noncritical error mask */
+ U32 reserved33[55]; /* 1824 */
+ U32 rxDataBufferFreeSpace; /* 1900 number free entries in RX buffer */
+ U32 reserved34[3]; /* 1904 */
+ U32 txDataBuffer0FreeSpace; /* 1910 num free entries in TX buffer */
+ U32 reserved35[3]; /* 1914 */
+ U32 txDataBuffer1FreeSpace; /* 1920 num free entries in TX buffer */
+ U32 reserved36[127]; /* 1924 */
+ U32 rxMACStatus; /* 1B20 status from MAC for RX packets */
+#define TOMAL_RX_MAC_CODE_ERROR 0x00001000 /* XEMAC */
+#define TOMAL_RX_MAC_PARITY_ERROR 0x00000400 /* XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_OVERRUN 0x00000200 /* XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_PAUSE_FRAME 0x00000100 /* XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_BAD_FRAME 0x00000080 /* XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_RUNT_FRAME 0x00000040 /* XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_SHORT_EVENT 0x00000020 /* EMAC4 */
+#define TOMAL_RX_MAC_ALIGN_ERROR 0x00000010 /* EMAC4 */
+#define TOMAL_RX_MAC_BAD_FCS 0x00000008 /* XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_FRAME_TOO_LONG 0x00000004 /* XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_OUT_RANGE_ERROR 0x00000002 /* XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_IN_RANGE_ERROR 0x00000001 /* XEMAC/EMAC4 */
+#define TOMAL_RX_MAC_XEMAC_MASK (TOMAL_RX_MAC_CODE_ERROR | \
+ TOMAL_RX_MAC_PARITY_ERROR | TOMAL_RX_MAC_OVERRUN | \
+ TOMAL_RX_MAC_PAUSE_FRAME | TOMAL_RX_MAC_BAD_FRAME | \
+ TOMAL_RX_MAC_RUNT_FRAME | TOMAL_RX_MAC_BAD_FCS | \
+ TOMAL_RX_MAC_FRAME_TOO_LONG | TOMAL_RX_MAC_OUT_RANGE_ERROR | \
+ TOMAL_RX_MAC_IN_RANGE_ERROR)
+ U32 reserved37[3]; /* 1B24 */
+ U32 rxMACStatusEnable; /* 1B30 enable bits in rxMACStatus */
+ U32 reserved38[3]; /* 1B34 */
+ U32 rxMACStatusMask; /* 1B40 mask bits in rxMACStatus */
+ U32 reserved39[3]; /* 1B44 */
+ U32 txMACStatus; /* 1B50 status from MAC for TX packets */
+#define TOMAL_TX_MAC_LOCAL_FAULT 0x00001000 /* XEMAC */
+#define TOMAL_TX_MAC_REMOTE_FAULT 0x00000800 /* XEMAC */
+#define TOMAL_TX_MAC_BAD_FCS 0x00000200 /* EMAC4 */
+#define TOMAL_TX_MAC_PARITY_ERROR 0x00000100 /* XEMAC */
+#define TOMAL_TX_MAC_LOST_CARRIER 0x00000080 /* EMAC4 */
+#define TOMAL_TX_MAC_EXCESSIVE_DEFERRAL 0x00000040 /* EMAC4 */
+#define TOMAL_TX_MAC_EXCESSIVE_COLLISION 0x00000020 /* EMAC4 */
+#define TOMAL_TX_MAC_LATE_COLLISION 0x00000010 /* EMAC4 */
+#define TOMAL_TX_MAC_UNDERRUN 0x00000002 /* XEMAC/EMAC4 */
+#define TOMAL_TX_MAC_SQE 0x00000001 /* EMAC4 */
+#define TOMAL_TX_MAC_XEMAC_MASK (TOMAL_TX_MAC_LOCAL_FAULT | \
+ TOMAL_TX_MAC_REMOTE_FAULT | TOMAL_TX_MAC_PARITY_ERROR | \
+ TOMAL_TX_MAC_UNDERRUN)
+ U32 reserved40[3]; /* 1B54 */
+ U32 txMACStatusEnable; /* 1B60 enable bits in txMACStatus */
+ U32 reserved41[3]; /* 1B64 */
+ U32 txMACStatusMask; /* 1B70 mask bits in txMACStatus */
+ U32 reserved42[163]; /* 1B74 */
+ U32 hwErrorsStatus; /* 1E00 hardware error status */
+#define TOMAL_HW_ERRORS_IRAPE 0x00000008
+#define TOMAL_HW_ERRORS_ORAPE 0x00000004
+#define TOMAL_HW_ERRORS_IDBPE 0x00000002
+#define TOMAL_HW_ERRORS_ODBPE 0x00000001
+ U32 reserved43[3]; /* 1E04 */
+ U32 hwErrorsEnable; /* 1E10 enable bits in hwErrorsStatus */
+ U32 reserved44[3]; /* 1E14 */
+ U32 hwErrorsMask; /* 1E20 mask bits in hwErrorsStatus */
+ U32 reserved45[55]; /* 1E24 */
+ U32 swCriticalErrorsStatus; /* 1F00 software critical error status */
+#define TOMAL_SW_CRIT_ERRORS_TDBC 0x00000002
+#define TOMAL_SW_CRIT_ERRORS_RDBC 0x00000001
+ U32 reserved46[3]; /* 1F04 */
+ U32 swCriticalErrorsEnable; /* 1F10 enable bits in swCriticalErrorsStatus */
+ U32 reserved47[3]; /* 1F14 */
+ U32 swCriticalErrorsMask; /* 1F20 mask bits in swCriticalErrorsStatus */
+ U32 reserved48[3]; /* 1F24 */
+ U32 rxDescriptorBadCodeFEC; /* 1F30 RX channel w/bad code descriptor */
+ U32 reserved49[3]; /* 1F34 */
+ U32 txDescriptorBadCodeFEC; /* 1F40 TX channel w/bad code descriptor */
+ U32 reserved50[15]; /* 1F44 */
+ U32 interruptStatus; /* 1F80 interrupt status register */
+#define TOMAL_INTERRUPT_TX1 0x00020000
+#define TOMAL_INTERRUPT_TX0 0x00010000
+#define TOMAL_INTERRUPT_RX1 0x00000200
+#define TOMAL_INTERRUPT_RX0 0x00000100
+#define TOMAL_INTERRUPT_TX_MAC_ERROR1 0x00000080
+#define TOMAL_INTERRUPT_TX_MAC_ERROR0 0x00000040
+#define TOMAL_INTERRUPT_RX_MAC_ERROR1 0x00000020
+#define TOMAL_INTERRUPT_RX_MAC_ERROR0 0x00000010
+#define TOMAL_INTERRUPT_PLB_PARITY_ERROR 0x00000008
+#define TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR1 0x00000004
+#define TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0 0x00000002
+#define TOMAL_INTERRUPT_CRITICAL_ERROR 0x00000001
+#define TOMAL_IRQ0_MASK (TOMAL_INTERRUPT_TX0 | TOMAL_INTERRUPT_RX0 | \
+ TOMAL_INTERRUPT_TX_MAC_ERROR0 | TOMAL_INTERRUPT_RX_MAC_ERROR0 | \
+ TOMAL_INTERRUPT_PLB_PARITY_ERROR | TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR0 | \
+ TOMAL_INTERRUPT_CRITICAL_ERROR)
+#define TOMAL_IRQ1_MASK (TOMAL_INTERRUPT_TX1 | TOMAL_INTERRUPT_RX1 | \
+ TOMAL_INTERRUPT_TX_MAC_ERROR1 | TOMAL_INTERRUPT_RX_MAC_ERROR1 | \
+ TOMAL_INTERRUPT_SW_NONCRITICAL_ERROR1)
+ U32 reserved51[3]; /* 1F84 */
+ U32 interruptRoute; /* 1F90 interrupt line routing */
+ U32 reserved52[51]; /* 1F94 */
+ U32 rxMACBadStatusCounter; /* 2060 num frames with errors in MAC */
+ U32 reserved53[999]; /* 2064 */
+ U32 debugVectorsCtrl; /* 3000 */
+ U32 reserved54[3]; /* 3004 */
+ U32 debugVectorsReadData; /* 3010 */
+} TOMALRegs;
+
+typedef volatile struct _RxDesc {
+ U16 code;
+#define TOMAL_RX_DESC_CODE 0x6000
+ U16 postedLength;
+ U16 status;
+#define TOMAL_RX_LAST 0x8000
+#define TOMAL_RX_STATUS_ENCODE_MASK 0x03f0
+#define TOMAL_RX_STATUS_TCP_UDP_CHECKSUM_PASSED 0x0008
+#define TOMAL_RX_STATUS_IP_CHECKSUM_PASSED 0x0004
+#define TOMAL_RX_STATUS_CHECKSUM_VALID 0x0002
+ U16 totalFrameLength;
+ U16 reserved;
+ U16 buffHeadAddrH; /* bits 16-31 of data buffer address */
+ U32 buffHeadAddrL; /* bits 32-63 of data buffer address */
+} RxDesc;
+
+
+typedef volatile struct _TxDesc {
+ U8 code;
+#define TOMAL_TX_DESC_CODE 0x60
+#define TOMAL_TX_SIGNAL 0x04
+#define TOMAL_TX_NOTIFY_REQ 0x02
+#define TOMAL_TX_LAST 0x01
+ U8 command;
+#define TOMAL_TX_ENABLE_HW_CHECKSUM 0x40
+#define TOMAL_TX_GENERATE_FCS 0x20
+#define TOMAL_TX_GENERATE_PAD 0x30 /* GENERATE_FCS must also be set */
+#define TOMAL_TX_INSERT_SOURCE_ADDR 0x08
+#define TOMAL_TX_REPLACE_SOURCE_ADDR 0x04
+#define TOMAL_TX_INSERT_VLAN_TAG 0x02
+#define TOMAL_TX_REPLACE_VLAN_TAG 0x01
+ U16 postedLength;
+ U32 wBStatus;
+#define TOMAL_TX_STATUS_GOOD 0x00010000
+ U16 reserved;
+ U16 buffHeadAddrH; /* bits 16-31 of data buffer address */
+ U32 buffHeadAddrL; /* bits 32-63 of data buffer address */
+} TxDesc;
+
+
+typedef volatile struct _BranchDesc {
+ U64 code;
+#define TOMAL_BRANCH_CODE 0x2000000000000000ULL
+ U16 reserved;
+ U16 nextDescAddrH; /* bits 16-31 of next descriptor address */
+ U32 nextDescAddrL; /* bits 32-63 of next descriptor address (16 byte aligned) */
+} BranchDesc;
+
+
+
+typedef struct _RxDescSegment {
+ RxDesc* desc;
+ RxDesc* currDesc;
+ struct sk_buff** skb;
+ struct sk_buff** currSkb;
+ dma_addr_t dmaHandle;
+ size_t size;
+ BranchDesc* branchDesc;
+ struct _RxDescSegment* next;
+} RxDescSegment;
+
+
+typedef struct _TxDescSegment {
+ TxDesc* desc;
+ U32 oldIndex;
+ U32 freeIndex;
+ struct sk_buff** skb;
+ dma_addr_t dmaHandle;
+ size_t size;
+ BranchDesc* branchDesc;
+ struct _TxDescSegment* next;
+} TxDescSegment;
+
+
+typedef struct _TOMAL {
+ /* Mapping of TOMAL's HW registers. */
+ TOMALRegs* regs[TOMAL_MAX_CHANNELS];
+
+ /* RX buffers, descriptors, and other data. */
+ U32 maxRxBuffers[TOMAL_MAX_CHANNELS];
+ U16 rxBufferSize[TOMAL_MAX_CHANNELS];
+ RxDescSegment* rxDescSegment[TOMAL_MAX_CHANNELS];
+ RxDescSegment* oldRxSegment[TOMAL_MAX_CHANNELS]; /* oldest non-served RX desc segment */
+
+ /* TX descriptors and other data. */
+ U32 maxTxBuffers[TOMAL_MAX_CHANNELS];
+ U32 pendingTxBuffers[TOMAL_MAX_CHANNELS];
+ U32 numberOfTransmittedFrames[TOMAL_MAX_CHANNELS];
+ U32 numberOfReceivedFrames[TOMAL_MAX_CHANNELS];
+ TxDescSegment* txDescSegment[TOMAL_MAX_CHANNELS];
+ TxDescSegment* oldTxSegment[TOMAL_MAX_CHANNELS]; /* oldest non-served TX desc segment */
+ TxDescSegment* freeTxSegment[TOMAL_MAX_CHANNELS]; /* next free TX descriptor segment */
+
+ struct net_device* netDev[TOMAL_MAX_CHANNELS];
+ spinlock_t rxLock[TOMAL_MAX_CHANNELS];
+ spinlock_t txLock[TOMAL_MAX_CHANNELS];
+ struct napi_struct napi[TOMAL_MAX_CHANNELS] ; /* 2.6.27-ism for NAPI poll */
+ int irq0;
+ int irq1;
+ int count_tx_checksum_type[4] ;
+ struct proc_dir_entry* parentDir;
+ struct proc_dir_entry* tomalDir;
+ struct proc_dir_entry* hwDir;
+ struct proc_dir_entry* swDir;
+ U32 numberOfNetrxDrops ;
+ U32 numberOfHwDrops0 ;
+ U32 numberOfHwDrops1 ;
+ U32 numberOfNotLast ;
+
+} TOMAL;
+
+
+
+typedef enum {
+ tomal_ras_none = 0x00,
+ tomal_ras_timeout = 0x01,
+ tomal_ras_alloc_error = 0x02,
+ tomal_ras_spurious_irq = 0x03,
+ tomal_ras_unknown_critical_int = 0x04,
+ tomal_ras_unknown_noncrit_int = 0x05,
+ tomal_ras_ioremap_error = 0x06,
+ tomal_ras_irq_unavailable = 0x07,
+
+ tomal_ras_max = 0xff
+} tomal_ras_id;
+
+
+TOMAL* __init tomal_init(void* devMapAddr,
+ struct net_device* netDev0,
+ U32 rxTotalBufferSize0,
+ U32 numTxBuffers0,
+ struct net_device* netDev1,
+ U32 rxTotalBufferSize1,
+ U32 numTxBuffers1,
+ int irq0,
+ int irq1,
+ struct proc_dir_entry* procDir);
+
+int tomal_xmit_tx_buffer(TOMAL* tomal, U8 channel, struct sk_buff* skb);
+int tomal_alloc_rx_buffers(TOMAL* tomal, U8 channel);
+int tomal_free_rx_buffers(TOMAL* tomal, U8 channel);
+#if defined(CONFIG_BGP_E10000_NAPI)
+int tomal_poll_napi(struct napi_struct * napi, int budget);
+#else
+int tomal_poll(struct net_device *netDev, int budget);
+#endif
+int tomal_process_tx_buffers(TOMAL* tomal, U8 channel, U32 txNumTransmitDesc);
+void tomal_free_rx_segments(TOMAL* tomal, U8 channel);
+void tomal_free_tx_segments(TOMAL* tomal, U8 channel);
+void tomal_free_tx_buffers(TOMAL* tomal, U8 channel);
+int tomal_alloc_rx_segments(TOMAL* tomal, U8 channel, U32 numDescriptors);
+int tomal_alloc_tx_segments(TOMAL* tomal, U8 channel, U32 numDescriptors);
+
+int tomal_soft_reset(TOMAL* tomal);
+int tomal_configure(TOMAL* tomal);
+
+
+/* Turns all RX & TX channels off. */
+static inline void tomal_rx_tx_disable(TOMAL* tomal)
+{
+ U32 ccr = in_be32(&tomal->regs[0]->configurationCtrl);
+
+ ccr &= ~(TOMAL_CFG_CTRL_RX_MAC0 | TOMAL_CFG_CTRL_RX_MAC1 | TOMAL_CFG_CTRL_TX_MAC0 |
+ TOMAL_CFG_CTRL_TX_MAC1);
+ out_be32(&tomal->regs[0]->configurationCtrl, ccr);
+
+ return;
+}
+
+
+/* Turns all RX & TX channels on. */
+static inline void tomal_rx_tx_enable(TOMAL* tomal)
+{
+ out_be32(&tomal->regs[0]->configurationCtrl, TOMAL_CFG_CTRL_RX_MAC0 |
+ TOMAL_CFG_CTRL_RX_MAC1 | TOMAL_CFG_CTRL_TX_MAC0 | TOMAL_CFG_CTRL_TX_MAC1);
+
+ return;
+}
+
+void tomal_irq_enable(TOMAL* tomal, U8 channel);
+
+
+void tomal_irq_disable(TOMAL* tomal, U8 channel);
+
+
+int tomal_pending_rx_buffers(TOMAL* tomal, U8 channel);
+int tomal_pending_tx_buffers(TOMAL* tomal, U8 channel);
+
+void tomal_exit(TOMAL* tomal);
+
+
+#endif
diff --git a/drivers/net/bgp_network/450_tlb.h b/drivers/net/bgp_network/450_tlb.h
new file mode 100644
index 00000000000000..67f04c963c3b20
--- /dev/null
+++ b/drivers/net/bgp_network/450_tlb.h
@@ -0,0 +1,121 @@
+/* Basic access functions for 'software TLBs' in powerpc 440/450 */
+#ifndef __450_tlb_h__
+#define __450_tlb_h__
+#include <asm/bluegene_ras.h>
+
+static inline int get_tlb_pageid(int tlbindex)
+ {
+ int rc ;
+ /* PPC44x_TLB_PAGEID is 0 */
+ asm volatile( "tlbre %[rc],%[index],0"
+ : [rc] "=r" (rc)
+ : [index] "r" (tlbindex)
+ ) ;
+ return rc ;
+ }
+
+static inline int get_tlb_xlat(int tlbindex)
+ {
+ int rc ;
+ /* PPC44x_TLB_XLAT is 1 */
+ asm volatile( "tlbre %[rc],%[index],1"
+ : [rc] "=r" (rc)
+ : [index] "r" (tlbindex)
+ ) ;
+ return rc ;
+ }
+
+static inline int get_tlb_attrib(int tlbindex)
+ {
+ int rc ;
+ /* PPC44x_TLB_ATTRIB is 2 */
+ asm volatile( "tlbre %[rc],%[index],2"
+ : [rc] "=r" (rc)
+ : [index] "r" (tlbindex)
+ ) ;
+ return rc ;
+ }
+
+static inline int search_tlb(unsigned int vaddr)
+ {
+ int rc ;
+ /* PPC44x_TLB_ATTRIB is 2 */
+ asm volatile( "tlbsx %[rc],0,%[vaddr]"
+ : [rc] "=r" (rc)
+ : [vaddr] "r" (vaddr)
+ ) ;
+ return rc ;
+ }
+
+//static inline int search_tlb_validity(unsigned int vaddr)
+//{
+// int validity ;
+// asm volatile( "tlbsx. %[validity],0,%[vaddr]" "\n"
+// "mfcr %[validity]"
+// :
+// [validity] "=r" (validity)
+// : [vaddr] "r" (vaddr)
+// : "cc"
+// ) ;
+// return validity ;
+//}
+
+
+static inline int search_tlb_v(unsigned int vaddr)
+ {
+ int rc ;
+ int tlbindex ;
+ int validity ;
+ /* PPC44x_TLB_ATTRIB is 2 */
+ asm volatile( "tlbsx. %[tlbindex],0,%[vaddr]" "\n"
+ "mfcr %[validity]"
+ : [tlbindex] "=r" (tlbindex),
+ [validity] "=r" (validity)
+ : [vaddr] "r" (vaddr)
+ : "cc"
+ ) ;
+// tlbindex = search_tlb(vaddr) ;
+// validity=search_tlb_validity(vaddr) ;
+ rc = (validity & 0x20000000) | (tlbindex & 0xefffffff) ; // Hi bit for 'found', other bits (bottom 6, really) for index
+// TRACEN(k_t_request,"vaddr=0x%08x tlbindex=0x%08x validity=0x%08x rc=0x%08x",vaddr,tlbindex,validity,rc) ;
+ return rc ;
+ }
+
+#define TLB0_EPN_1K(a) ((a)&0xFFFFFC00) /* EA[ 0:21] */
+#define TLB0_V _BN(22) /* Valid Bit */
+#define TLB0_TS _BN(23) /* Translation Address Space */
+#define TLB0_SIZE(x) _B4(27,x) /* Page Size */
+#define TLB1_ERPN(e) _B4(31,e) /* Extended RPN: 4 MSb's of 36b Physical Address */
+#define TLB1_RPN_1K(p) ((p)&0xFFFFFC00) /* RPN[ 0:21] */
+
+#define TLB2_FAR _BN(10) /* Fixed Address Region */
+#define TLB2_WL1 _BN(11) /* Write-Thru L1 (when CCR1[L2COBE]=1) */
+#define TLB2_IL1I _BN(12) /* Inhibit L1-I caching (when CCR1[L2COBE]=1) */
+#define TLB2_IL1D _BN(13) /* Inhibit L1-D caching (when CCR1[L2COBE]=1) */
+#define TLB2_IL2I _BN(14) /* see below (on normal C450: Inhibit L2-I caching (when CCR1[L2COBE]=1) */
+#define TLB2_IL2D _BN(15) /* see below (on normal C450: Inhibit L2-D caching (when CCR1[L2COBE]=1) */
+#define TLB2_U0 _BN(16) /* see below (undefined/available on normal C450 */
+#define TLB2_U1 _BN(17) /* User 1: L1 Transient Enable */
+#define TLB2_U2 _BN(18) /* User 2: L1 Store WithOut Allocate #define TLB2_U3 _BN(19) // see below (on normal C450: User 3: L3 Prefetch Inhibit (0=Enabled, 1=Inhibited) */
+#define TLB2_U3 _BN(19) /* see below (on normal C450: User 3: L3 Prefetch Inhibit (0=Enabled, 1=Inhibited) */
+#define TLB2_W _BN(20) /* Write-Thru=1, Write-Back=0 */
+#define TLB2_I _BN(21) /* Cache-Inhibited=1, Cacheable=0 */
+#define TLB2_M _BN(22) /* Memory Coherence Required */
+#define TLB2_G _BN(23) /* Guarded */
+#define TLB2_E _BN(24) /* Endian: 0=Big, 1=Little */
+#define TLB2_UX _BN(26) /* User Execute Enable */
+#define TLB2_UW _BN(27) /* User Write Enable */
+#define TLB2_UR _BN(28) /* User Read Enable */
+#define TLB2_SX _BN(29) /* Supervisor Execute Enable */
+#define TLB2_SW _BN(30) /* Supervisor Write Enable */
+#define TLB2_SR _BN(31) /* Supervisor Read Enable */
+
+/* BGP Specific controls */
+#define TLB2_IL3I (TLB2_IL2I) /* L3 Inhibit for Instruction Fetches */
+#define TLB2_IL3D (TLB2_IL2D) /* L3 Inhibit for Data Accesses */
+#define TLB2_IL2 (TLB2_U0) /* U0 is L2 Prefetch Inhibit */
+#define TLB2_T (TLB2_U1) /* U1 Transient Enabled is supported. */
+#define TLB2_SWOA (TLB2_U2) /* U2 Store WithOut Allocate is supported. */
+#define TLB2_L2_PF_OPT (TLB2_U3) /* U3 is L2 Optimiztic Prefetch ("Automatic" when 0) */
+
+#endif
diff --git a/drivers/net/bgp_network/bgdiagnose.h b/drivers/net/bgp_network/bgdiagnose.h
new file mode 100644
index 00000000000000..be205219da263b
--- /dev/null
+++ b/drivers/net/bgp_network/bgdiagnose.h
@@ -0,0 +1,183 @@
+/*
+ * bgdiagnose.h
+ *
+ * Diagnostic routines for 450/BGP bringup
+ *
+ */
+#ifndef __DRIVERS__NET__BLUEGENE__BGDIAGNOSE_H__
+#define __DRIVERS__NET__BLUEGENE__BGDIAGNOSE_H__
+/* #include <asm/bluegene.h> */
+
+#include <linux/kernel.h>
+/* #include <asm/bgp_personality.h> */
+#include <asm/bluegene_ras.h>
+#include "450_tlb.h"
+
+/* static BGP_Personality_t* bgp_personality ; */
+
+/* static void show_personality_kernel(BGP_Personality_Kernel_t * Kernel_Config) */
+/* { */
+/* printk(KERN_INFO "show_personality_kernel L1Config=0x%08x L2Config=0x%08x L3Config=0x%08x L3Select=0x%08x FreqMHz=%d NodeConfig=0x%08x\n", */
+/* Kernel_Config->L1Config, */
+/* Kernel_Config->L2Config, */
+/* Kernel_Config->L3Config, */
+/* Kernel_Config->L3Select, */
+/* Kernel_Config->FreqMHz, */
+/* Kernel_Config->NodeConfig) ; */
+/* */
+/* } */
+/* static void show_personality(void) */
+/* { */
+/* // bgp_personality = bgcns()->getPersonalityData(); */
+/* // show_personality_kernel(&bgp_personality->Kernel_Config) ; */
+/* } */
+
+static const char* TLB_SIZES[] = {
+ " 1K", /* 0 */
+ " 4K",
+ " 16K",
+ " 64K",
+ "256K",
+ " 1M",
+ "?-6?",
+ " 16M",
+ "?-8?",
+ "256M",
+ " 1G",
+ "?11?",
+ "?12?",
+ "?13?",
+ "?14?",
+ "?15?"
+};
+
+#include "450_tlb.h"
+
+
+static void show_tlbs(unsigned int vaddr) __attribute__ ((unused)) ;
+static void show_tlbs(unsigned int vaddr) {
+
+ int i;
+ uint32_t t0, t1, t2;
+ int tlb_index = search_tlb(vaddr) ;
+ for (i = 0; i < 64; i++) {
+ t0 = get_tlb_pageid(i) ;
+ t1 = get_tlb_xlat(i) ;
+ t2 = get_tlb_attrib(i) ;
+/* _bgp_mftlb(i,t0,t1,t2); */
+/* if (t0 & TLB0_V) { */
+ {
+ printk(KERN_INFO
+ "TLB 0x%02x %08x-%08x-%08x EPN=%08x RPN=%01x-%08x size=%s WIMG=%d%d%d%d U=%d%d%d%d V=%d\n",
+ i,
+ t0, t1, t2,
+ TLB0_EPN_1K(t0),
+ TLB1_ERPN(t1),TLB1_RPN_1K(t1),
+ TLB_SIZES[(t0 & 0xF0) >> 4],
+ (t2 & TLB2_W) ? 1 : 0,
+ (t2 & TLB2_I) ? 1 : 0,
+ (t2 & TLB2_M) ? 1 : 0,
+ (t2 & TLB2_G) ? 1 : 0,
+ (t2 & TLB2_U0) ? 1 : 0,
+ (t2 & TLB2_U1) ? 1 : 0,
+ (t2 & TLB2_U2) ? 1 : 0,
+ (t2 & TLB2_U3) ? 1 : 0,
+ (t0 & TLB0_V) ? 1 : 0
+ );
+ }
+ }
+ printk(KERN_INFO "vaddr=0x%08x tlb_index=%d\n", vaddr,tlb_index) ;
+}
+
+static void show_tlb_for_vaddr(unsigned int vaddr) __attribute__ ((unused)) ;
+static void show_tlb_for_vaddr(unsigned int vaddr)
+{
+ int i = search_tlb(vaddr) & 0x3f ;
+ uint32_t t0 = get_tlb_pageid(i) ;
+ uint32_t t1 = get_tlb_xlat(i) ;
+ uint32_t t2 = get_tlb_attrib(i) ;
+ printk(KERN_INFO
+ "TLB 0x%02x %08x-%08x-%08x EPN=%08x RPN=%01x-%08x size=%s WIMG=%d%d%d%d U=%d%d%d%d V=%d\n",
+ i,
+ t0, t1, t2,
+ TLB0_EPN_1K(t0),
+ TLB1_ERPN(t1),TLB1_RPN_1K(t1),
+ TLB_SIZES[(t0 & 0xF0) >> 4],
+ (t2 & TLB2_W) ? 1 : 0,
+ (t2 & TLB2_I) ? 1 : 0,
+ (t2 & TLB2_M) ? 1 : 0,
+ (t2 & TLB2_G) ? 1 : 0,
+ (t2 & TLB2_U0) ? 1 : 0,
+ (t2 & TLB2_U1) ? 1 : 0,
+ (t2 & TLB2_U2) ? 1 : 0,
+ (t2 & TLB2_U3) ? 1 : 0,
+ (t0 & TLB0_V) ? 1 : 0
+ );
+
+}
+static inline unsigned int move_from_spr(unsigned int sprNum)
+ {
+ unsigned long sprVal = 0;
+
+ asm volatile ("mfspr %0,%1\n" : "=r"(sprVal) : "i" (sprNum));
+
+ return sprVal;
+
+ }
+static inline void show_spr(unsigned int spr, const char *name)
+ {
+ printk(KERN_INFO "%s[%03x] = 0x%08x\n",name,spr, move_from_spr(spr)) ;
+ }
+
+static inline unsigned int move_from_dcr(unsigned int dcrNum)
+{
+ unsigned long dcrVal = 0;
+
+ asm volatile("mfdcrx %0,%1": "=r" (dcrVal) : "r" (dcrNum) : "memory");
+
+ return dcrVal;
+}
+
+static inline unsigned int move_from_msr(void)
+{
+ unsigned long msrVal = 0;
+
+ asm volatile("mfmsr %0" : "=r" (msrVal) : : "memory");
+
+ return msrVal;
+}
+
+static inline void show_msr(void)
+ {
+ printk(KERN_INFO "MSR = 0x%08x\n",move_from_msr()) ;
+ }
+
+static void show_dcr_range(unsigned int start, unsigned int length) __attribute__ ((unused)) ;
+static void show_dcr_range(unsigned int start, unsigned int length)
+ {
+ unsigned int x ;
+ for( x=0;x<length;x+=8 )
+ {
+ unsigned int dcrx=start+x ;
+ printk(KERN_INFO "dcr[%04x]=[%08x %08x %08x %08x %08x %08x %08x %08x]\n",
+ start+x,
+ move_from_dcr(dcrx),move_from_dcr(dcrx+1),move_from_dcr(dcrx+2),move_from_dcr(dcrx+3),
+ move_from_dcr(dcrx+4),move_from_dcr(dcrx+5),move_from_dcr(dcrx+6),move_from_dcr(dcrx+7)
+ ) ;
+ }
+ }
+static void show_sprs(void) __attribute__ ((unused)) ;
+static void show_sprs(void)
+{
+ show_msr() ;
+ show_spr(0x3b3,"CCR0") ;
+ show_spr(0x378,"CCR1") ;
+ show_spr(0x3b2,"MMUCR") ;
+ show_spr(0x39b,"RSTCFG") ;
+/* show_dcr_range(0x500,32) ; // _BGP_DCR_L30 */
+/* show_dcr_range(0x540,32) ; // _BGP_DCR_L31 */
+/* show_dcr_range(0xd00,16) ; // _BGP_DCR_DMA */
+
+ }
+
+#endif
diff --git a/drivers/net/bgp_network/bgp_net_traceflags.h b/drivers/net/bgp_network/bgp_net_traceflags.h
new file mode 100644
index 00000000000000..1a148f2064c3a5
--- /dev/null
+++ b/drivers/net/bgp_network/bgp_net_traceflags.h
@@ -0,0 +1,56 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for collective and torus
+ *
+ *
+ ********************************************************************/
+#ifndef __BGP_NET_TRACEFLAGS_H__
+#define __BGP_NET_TRACEFLAGS_H__
+
+enum {
+ k_t_general = 0x01 ,
+ k_t_lowvol = 0x02 ,
+ k_t_irqflow = 0x04 ,
+ k_t_irqflow_rcv = 0x08 ,
+ k_t_protocol = 0x10 ,
+ k_t_detail = 0x20 ,
+ k_t_fifocontents = 0x40 ,
+ k_t_toruspkt = 0x80 ,
+ k_t_bgcolpkt = 0x80 ,
+ k_t_init = 0x100 ,
+ k_t_request = 0x200 ,
+ k_t_error = 0x400 ,
+ k_t_sync = 0x800 ,
+ k_t_api = 0x1000 ,
+ k_t_diagnosis = 0x2000 ,
+ k_t_congestion = 0x4000 ,
+ k_t_startxmit = 0x8000 ,
+ k_t_napi = 0x10000 ,
+ k_t_scattergather = 0x20000 ,
+ k_t_flowcontrol = 0x40000 ,
+ k_t_entryexit = 0x80000 ,
+ k_t_dmacopy = 0x100000 ,
+ k_t_fpucopy = 0x200000 ,
+ k_t_sgdiag = 0x400000 ,
+ k_t_sgdiag_detail = 0x800000 ,
+ k_t_inject_detail = 0x1000000 ,
+};
+
+#endif
diff --git a/drivers/net/bgp_statistics/Makefile b/drivers/net/bgp_statistics/Makefile
new file mode 100644
index 00000000000000..666c9b9cdd631b
--- /dev/null
+++ b/drivers/net/bgp_statistics/Makefile
@@ -0,0 +1,4 @@
+# Makefile for BlueGene collective and torus driver
+
+
+obj-$(CONFIG_BGP_STATISTICS) += bgp_stats.o
diff --git a/drivers/net/bgp_statistics/bgp_stats.c b/drivers/net/bgp_statistics/bgp_stats.c
new file mode 100644
index 00000000000000..e180cb500a238e
--- /dev/null
+++ b/drivers/net/bgp_statistics/bgp_stats.c
@@ -0,0 +1,258 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ *
+ * Description: Statistic collection for Blue Gene low-level driver for sockets over torus
+ *
+ *
+ ********************************************************************/
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/bootmem.h>
+
+#include <linux/alignment_histograms.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+
+
+static int bgp_statistics_init (void);
+static void bgp_statistics_cleanup (void);
+
+module_init(bgp_statistics_init);
+module_exit(bgp_statistics_cleanup);
+
+
+MODULE_DESCRIPTION("BG/P statistics driver");
+MODULE_LICENSE("GPL");
+
+#ifndef CTL_UNNUMBERED
+#define CTL_UNNUMBERED -2
+#endif
+
+/* Parameters, statistics, and debugging */
+#if defined(CONFIG_DEBUG_ALIGNMENT_HISTOGRAM)
+struct alignment_histogram al_histogram ;
+#endif
+
+static struct ctl_path bgp_statistics_ctl_path[] = {
+ { .procname = "bgp", .ctl_name = 0, },
+ { .procname = "statistics", .ctl_name = 0, },
+/* { .procname = "torus", .ctl_name = 0, }, */
+ { },
+};
+
+#define CTL_PARAM_EXT(Name,Var) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .data = &(Var), \
+ .maxlen = sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_dointvec \
+ }
+
+#define CTL_PARAM_EXT_LL(Name,Var) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .data = &(Var), \
+ .maxlen = 2*sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_dointvec \
+ }
+
+
+struct ctl_table bgp_statistics_table[] = {
+#if defined(CONFIG_DEBUG_ALIGNMENT_HISTOGRAM)
+ CTL_PARAM_EXT("ah_min",al_histogram.min_size_of_interest) ,
+ CTL_PARAM_EXT("sah0",AL_HISTOGRAM(src_alignment_histogram_crc,0)) ,
+ CTL_PARAM_EXT("sah1",AL_HISTOGRAM(src_alignment_histogram_crc,1)) ,
+ CTL_PARAM_EXT("sah2",AL_HISTOGRAM(src_alignment_histogram_crc,2)) ,
+ CTL_PARAM_EXT("sah3",AL_HISTOGRAM(src_alignment_histogram_crc,3)) ,
+ CTL_PARAM_EXT("sah4",AL_HISTOGRAM(src_alignment_histogram_crc,4)) ,
+ CTL_PARAM_EXT("sah5",AL_HISTOGRAM(src_alignment_histogram_crc,5)) ,
+ CTL_PARAM_EXT("sah6",AL_HISTOGRAM(src_alignment_histogram_crc,6)) ,
+ CTL_PARAM_EXT("sah7",AL_HISTOGRAM(src_alignment_histogram_crc,7)) ,
+ CTL_PARAM_EXT("sah8",AL_HISTOGRAM(src_alignment_histogram_crc,8)) ,
+ CTL_PARAM_EXT("sah9",AL_HISTOGRAM(src_alignment_histogram_crc,9)) ,
+ CTL_PARAM_EXT("saha",AL_HISTOGRAM(src_alignment_histogram_crc,10)) ,
+ CTL_PARAM_EXT("sahb",AL_HISTOGRAM(src_alignment_histogram_crc,11)) ,
+ CTL_PARAM_EXT("sahc",AL_HISTOGRAM(src_alignment_histogram_crc,12)) ,
+ CTL_PARAM_EXT("sahd",AL_HISTOGRAM(src_alignment_histogram_crc,13)) ,
+ CTL_PARAM_EXT("sahe",AL_HISTOGRAM(src_alignment_histogram_crc,14)) ,
+ CTL_PARAM_EXT("sahf",AL_HISTOGRAM(src_alignment_histogram_crc,15)) ,
+ CTL_PARAM_EXT("dah0",AL_HISTOGRAM(dst_alignment_histogram_crc,0)) ,
+ CTL_PARAM_EXT("dah1",AL_HISTOGRAM(dst_alignment_histogram_crc,1)) ,
+ CTL_PARAM_EXT("dah2",AL_HISTOGRAM(dst_alignment_histogram_crc,2)) ,
+ CTL_PARAM_EXT("dah3",AL_HISTOGRAM(dst_alignment_histogram_crc,3)) ,
+ CTL_PARAM_EXT("dah4",AL_HISTOGRAM(dst_alignment_histogram_crc,4)) ,
+ CTL_PARAM_EXT("dah5",AL_HISTOGRAM(dst_alignment_histogram_crc,5)) ,
+ CTL_PARAM_EXT("dah6",AL_HISTOGRAM(dst_alignment_histogram_crc,6)) ,
+ CTL_PARAM_EXT("dah7",AL_HISTOGRAM(dst_alignment_histogram_crc,7)) ,
+ CTL_PARAM_EXT("dah8",AL_HISTOGRAM(dst_alignment_histogram_crc,8)) ,
+ CTL_PARAM_EXT("dah9",AL_HISTOGRAM(dst_alignment_histogram_crc,9)) ,
+ CTL_PARAM_EXT("daha",AL_HISTOGRAM(dst_alignment_histogram_crc,10)) ,
+ CTL_PARAM_EXT("dahb",AL_HISTOGRAM(dst_alignment_histogram_crc,11)) ,
+ CTL_PARAM_EXT("dahc",AL_HISTOGRAM(dst_alignment_histogram_crc,12)) ,
+ CTL_PARAM_EXT("dahd",AL_HISTOGRAM(dst_alignment_histogram_crc,13)) ,
+ CTL_PARAM_EXT("dahe",AL_HISTOGRAM(dst_alignment_histogram_crc,14)) ,
+ CTL_PARAM_EXT("dahf",AL_HISTOGRAM(dst_alignment_histogram_crc,15)) ,
+ CTL_PARAM_EXT("rah0",AL_HISTOGRAM(rel_alignment_histogram_crc,0)) ,
+ CTL_PARAM_EXT("rah1",AL_HISTOGRAM(rel_alignment_histogram_crc,1)) ,
+ CTL_PARAM_EXT("rah2",AL_HISTOGRAM(rel_alignment_histogram_crc,2)) ,
+ CTL_PARAM_EXT("rah3",AL_HISTOGRAM(rel_alignment_histogram_crc,3)) ,
+ CTL_PARAM_EXT("rah4",AL_HISTOGRAM(rel_alignment_histogram_crc,4)) ,
+ CTL_PARAM_EXT("rah5",AL_HISTOGRAM(rel_alignment_histogram_crc,5)) ,
+ CTL_PARAM_EXT("rah6",AL_HISTOGRAM(rel_alignment_histogram_crc,6)) ,
+ CTL_PARAM_EXT("rah7",AL_HISTOGRAM(rel_alignment_histogram_crc,7)) ,
+ CTL_PARAM_EXT("rah8",AL_HISTOGRAM(rel_alignment_histogram_crc,8)) ,
+ CTL_PARAM_EXT("rah9",AL_HISTOGRAM(rel_alignment_histogram_crc,9)) ,
+ CTL_PARAM_EXT("raha",AL_HISTOGRAM(rel_alignment_histogram_crc,10)) ,
+ CTL_PARAM_EXT("rahb",AL_HISTOGRAM(rel_alignment_histogram_crc,11)) ,
+ CTL_PARAM_EXT("rahc",AL_HISTOGRAM(rel_alignment_histogram_crc,12)) ,
+ CTL_PARAM_EXT("rahd",AL_HISTOGRAM(rel_alignment_histogram_crc,13)) ,
+ CTL_PARAM_EXT("rahe",AL_HISTOGRAM(rel_alignment_histogram_crc,14)) ,
+ CTL_PARAM_EXT("rahf",AL_HISTOGRAM(rel_alignment_histogram_crc,15)) ,
+ CTL_PARAM_EXT("scah0",AL_HISTOGRAM(src_alignment_histogram_copy,0)) ,
+ CTL_PARAM_EXT("scah1",AL_HISTOGRAM(src_alignment_histogram_copy,1)) ,
+ CTL_PARAM_EXT("scah2",AL_HISTOGRAM(src_alignment_histogram_copy,2)) ,
+ CTL_PARAM_EXT("scah3",AL_HISTOGRAM(src_alignment_histogram_copy,3)) ,
+ CTL_PARAM_EXT("scah4",AL_HISTOGRAM(src_alignment_histogram_copy,4)) ,
+ CTL_PARAM_EXT("scah5",AL_HISTOGRAM(src_alignment_histogram_copy,5)) ,
+ CTL_PARAM_EXT("scah6",AL_HISTOGRAM(src_alignment_histogram_copy,6)) ,
+ CTL_PARAM_EXT("scah7",AL_HISTOGRAM(src_alignment_histogram_copy,7)) ,
+ CTL_PARAM_EXT("scah8",AL_HISTOGRAM(src_alignment_histogram_copy,8)) ,
+ CTL_PARAM_EXT("scah9",AL_HISTOGRAM(src_alignment_histogram_copy,9)) ,
+ CTL_PARAM_EXT("scaha",AL_HISTOGRAM(src_alignment_histogram_copy,10)) ,
+ CTL_PARAM_EXT("scahb",AL_HISTOGRAM(src_alignment_histogram_copy,11)) ,
+ CTL_PARAM_EXT("scahc",AL_HISTOGRAM(src_alignment_histogram_copy,12)) ,
+ CTL_PARAM_EXT("scahd",AL_HISTOGRAM(src_alignment_histogram_copy,13)) ,
+ CTL_PARAM_EXT("scahe",AL_HISTOGRAM(src_alignment_histogram_copy,14)) ,
+ CTL_PARAM_EXT("scahf",AL_HISTOGRAM(src_alignment_histogram_copy,15)) ,
+ CTL_PARAM_EXT("dcah0",AL_HISTOGRAM(dst_alignment_histogram_copy,0)) ,
+ CTL_PARAM_EXT("dcah1",AL_HISTOGRAM(dst_alignment_histogram_copy,1)) ,
+ CTL_PARAM_EXT("dcah2",AL_HISTOGRAM(dst_alignment_histogram_copy,2)) ,
+ CTL_PARAM_EXT("dcah3",AL_HISTOGRAM(dst_alignment_histogram_copy,3)) ,
+ CTL_PARAM_EXT("dcah4",AL_HISTOGRAM(dst_alignment_histogram_copy,4)) ,
+ CTL_PARAM_EXT("dcah5",AL_HISTOGRAM(dst_alignment_histogram_copy,5)) ,
+ CTL_PARAM_EXT("dcah6",AL_HISTOGRAM(dst_alignment_histogram_copy,6)) ,
+ CTL_PARAM_EXT("dcah7",AL_HISTOGRAM(dst_alignment_histogram_copy,7)) ,
+ CTL_PARAM_EXT("dcah8",AL_HISTOGRAM(dst_alignment_histogram_copy,8)) ,
+ CTL_PARAM_EXT("dcah9",AL_HISTOGRAM(dst_alignment_histogram_copy,9)) ,
+ CTL_PARAM_EXT("dcaha",AL_HISTOGRAM(dst_alignment_histogram_copy,10)) ,
+ CTL_PARAM_EXT("dcahb",AL_HISTOGRAM(dst_alignment_histogram_copy,11)) ,
+ CTL_PARAM_EXT("dcahc",AL_HISTOGRAM(dst_alignment_histogram_copy,12)) ,
+ CTL_PARAM_EXT("dcahd",AL_HISTOGRAM(dst_alignment_histogram_copy,13)) ,
+ CTL_PARAM_EXT("dcahe",AL_HISTOGRAM(dst_alignment_histogram_copy,14)) ,
+ CTL_PARAM_EXT("dcahf",AL_HISTOGRAM(dst_alignment_histogram_copy,15)) ,
+ CTL_PARAM_EXT("rcah0",AL_HISTOGRAM(rel_alignment_histogram_copy,0)) ,
+ CTL_PARAM_EXT("rcah1",AL_HISTOGRAM(rel_alignment_histogram_copy,1)) ,
+ CTL_PARAM_EXT("rcah2",AL_HISTOGRAM(rel_alignment_histogram_copy,2)) ,
+ CTL_PARAM_EXT("rcah3",AL_HISTOGRAM(rel_alignment_histogram_copy,3)) ,
+ CTL_PARAM_EXT("rcah4",AL_HISTOGRAM(rel_alignment_histogram_copy,4)) ,
+ CTL_PARAM_EXT("rcah5",AL_HISTOGRAM(rel_alignment_histogram_copy,5)) ,
+ CTL_PARAM_EXT("rcah6",AL_HISTOGRAM(rel_alignment_histogram_copy,6)) ,
+ CTL_PARAM_EXT("rcah7",AL_HISTOGRAM(rel_alignment_histogram_copy,7)) ,
+ CTL_PARAM_EXT("rcah8",AL_HISTOGRAM(rel_alignment_histogram_copy,8)) ,
+ CTL_PARAM_EXT("rcah9",AL_HISTOGRAM(rel_alignment_histogram_copy,9)) ,
+ CTL_PARAM_EXT("rcaha",AL_HISTOGRAM(rel_alignment_histogram_copy,10)) ,
+ CTL_PARAM_EXT("rcahb",AL_HISTOGRAM(rel_alignment_histogram_copy,11)) ,
+ CTL_PARAM_EXT("rcahc",AL_HISTOGRAM(rel_alignment_histogram_copy,12)) ,
+ CTL_PARAM_EXT("rcahd",AL_HISTOGRAM(rel_alignment_histogram_copy,13)) ,
+ CTL_PARAM_EXT("rcahe",AL_HISTOGRAM(rel_alignment_histogram_copy,14)) ,
+ CTL_PARAM_EXT("rcahf",AL_HISTOGRAM(rel_alignment_histogram_copy,15)) ,
+ CTL_PARAM_EXT("tagh0",AL_HISTOGRAM(tagged,0)) ,
+ CTL_PARAM_EXT("tagh1",AL_HISTOGRAM(tagged,1)) ,
+ CTL_PARAM_EXT("tagh2",AL_HISTOGRAM(tagged,2)) ,
+ CTL_PARAM_EXT("tagh3",AL_HISTOGRAM(tagged,3)) ,
+ CTL_PARAM_EXT("tagh4",AL_HISTOGRAM(tagged,4)) ,
+ CTL_PARAM_EXT("tagh5",AL_HISTOGRAM(tagged,5)) ,
+ CTL_PARAM_EXT("tagh6",AL_HISTOGRAM(tagged,6)) ,
+ CTL_PARAM_EXT("tagh7",AL_HISTOGRAM(tagged,7)) ,
+ CTL_PARAM_EXT("tagh8",AL_HISTOGRAM(tagged,8)) ,
+ CTL_PARAM_EXT("tagh9",AL_HISTOGRAM(tagged,9)) ,
+ CTL_PARAM_EXT("tagha",AL_HISTOGRAM(tagged,10)) ,
+ CTL_PARAM_EXT("taghb",AL_HISTOGRAM(tagged,11)) ,
+ CTL_PARAM_EXT("taghc",AL_HISTOGRAM(tagged,12)) ,
+ CTL_PARAM_EXT("taghd",AL_HISTOGRAM(tagged,13)) ,
+ CTL_PARAM_EXT("taghe",AL_HISTOGRAM(tagged,14)) ,
+ CTL_PARAM_EXT("taghf",AL_HISTOGRAM(tagged,15)) ,
+ CTL_PARAM_EXT_LL("qcopy",al_histogram.qcopybytes) ,
+ CTL_PARAM_EXT_LL("copy",al_histogram.copybytes) ,
+ CTL_PARAM_EXT_LL("copyshort",al_histogram.copybytesshort) ,
+ CTL_PARAM_EXT_LL("copymisalign",al_histogram.copybytesmisalign) ,
+ CTL_PARAM_EXT_LL("copybroke",al_histogram.copybytesbroke) ,
+ CTL_PARAM_EXT_LL("crcb",al_histogram.crcbytes) ,
+ CTL_PARAM_EXT_LL("csumpartial",al_histogram.csumpartialbytes) ,
+#endif
+ { 0 },
+};
+
+
+
+static void register_statistics_sysctl(void)
+{
+ register_sysctl_paths(bgp_statistics_ctl_path,bgp_statistics_table) ;
+}
+static int bgp_statistics_init(void)
+ {
+ register_statistics_sysctl() ;
+ return 0 ;
+ }
+
+static void bgp_statistics_cleanup (void)
+{
+
+}
diff --git a/drivers/net/bgp_torus/Makefile b/drivers/net/bgp_torus/Makefile
new file mode 100644
index 00000000000000..4ed8b2021bb3f3
--- /dev/null
+++ b/drivers/net/bgp_torus/Makefile
@@ -0,0 +1,8 @@
+# Makefile for BlueGene collective and torus driver
+
+EXTRA_CFLAGS += -I$(BGPHOME)/bgp/arch/include -Iarch/powerpc/syslib/bgdd/ -Iarch/ppc/syslib/bgdd/ -g -dA -D__LINUX_KERNEL__
+
+bgp_torus-y := bgp_fpu_memcpy.o bgp_dma_tcp_frames.o bgp_dma_tcp.o bgtornic.o torus.o bgp_dma_tcp_diagnose.o bgp_dma_ioctl.o
+bgp_torus-$(CONFIG_BLUEGENE_DMA_MEMCPY) += bgp_dma_memcpy.o
+
+obj-$(CONFIG_BGP_TORUS) += bgp_torus.o
diff --git a/drivers/net/bgp_torus/bgp_bic_diagnosis.h b/drivers/net/bgp_torus/bgp_bic_diagnosis.h
new file mode 100644
index 00000000000000..4ac45edfba4474
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_bic_diagnosis.h
@@ -0,0 +1,75 @@
+/* These are defined by the hardware. */
+#define NR_BIC_GROUPS 15
+#define NR_BIC_GINTS 32
+#define NR_BIC_CPUS 4
+
+/* 4-bit target value for target register */
+#define BIC_TARGET_MASK (0xf)
+#define BIC_TARGET_TYPE_NORMAL (1<<2)
+#define BIC_TARGET_NORMAL(cpu) (BIC_TARGET_TYPE_NORMAL|(cpu))
+#define BIC_DEFAULT_CPU 0
+
+/* Define the layout of each group's registers.
+ * This layout should be 0x80 bytes long (including pad).
+ */
+struct bic_group_regs {
+ uint32_t status; /* 0x00 RW */
+ uint32_t rd_clr_status; /* 0x04 RO */
+ uint32_t status_clr; /* 0x08 WO */
+ uint32_t status_set; /* 0x0c WO */
+ uint32_t target[4]; /* 0x10 RW */
+ uint32_t normal[NR_BIC_CPUS]; /* 0x20 RW */
+ uint32_t critical[NR_BIC_CPUS]; /* 0x30 RW */
+ uint32_t mcheck[NR_BIC_CPUS]; /* 0x40 RW */
+ uint32_t _pad[12]; /* 0x50 */
+};
+
+/* Define the layout of the interrupt controller mem mapped regs. */
+struct bic_regs {
+ struct bic_group_regs group[NR_BIC_GROUPS]; /* 0x000 */
+ uint32_t hier_normal[NR_BIC_CPUS]; /* 0x780 */
+ uint32_t hier_critical[NR_BIC_CPUS]; /* 0x790 */
+ uint32_t hier_mcheck[NR_BIC_CPUS]; /* 0x7a0 */
+};
+
+struct bic {
+ spinlock_t mask_lock; /* could be finer grained if necessary */
+ struct bic_regs *regs;
+} ;
+
+extern volatile struct bic bic;
+
+/* void show_bic_regs(void) ; // diagnostic 'printk' of the BIC */
+static void show_bic_group(int g, volatile struct bic_group_regs* gp) __attribute__ ((unused)) ;
+static void show_bic_group(int g, volatile struct bic_group_regs* gp)
+{
+ printk(KERN_NOTICE "bic_group_regs[%d] status=%08x target=[%08x %08x %08x %08x]\n",g,gp->status, gp->target[0], gp->target[1], gp->target[2], gp->target[3]) ;
+ printk(KERN_NOTICE "bic_group_regs[%d] normal=[%08x %08x %08x %08x] critical=[%08x %08x %08x %08x] mcheck=[%08x %08x %08x %08x]\n",g, gp->normal[0], gp->normal[1], gp->normal[2], gp->normal[3], gp->critical[0],gp->critical[1],gp->critical[2],gp->critical[3],gp->mcheck[0],gp->mcheck[1],gp->mcheck[2],gp->mcheck[3]) ;
+}
+
+static void show_bic_regs(void) __attribute__ ((unused)) ;
+static void show_bic_regs(void)
+{
+ struct bic_regs * bic_regs = bic.regs ;
+ int g ;
+ for( g = 0 ; g < NR_BIC_GROUPS ; g += 1 )
+ {
+ show_bic_group(g,bic_regs->group+g) ;
+ }
+ printk(KERN_NOTICE "BIC hier_normal=%08x %08x %08x %08x\n",
+ bic_regs->hier_normal[0],
+ bic_regs->hier_normal[1],
+ bic_regs->hier_normal[2],
+ bic_regs->hier_normal[3]) ;
+ printk(KERN_NOTICE "BIC hier_critical=%08x %08x %08x %08x\n",
+ bic_regs->hier_critical[0],
+ bic_regs->hier_critical[1],
+ bic_regs->hier_critical[2],
+ bic_regs->hier_critical[3]) ;
+ printk(KERN_NOTICE "BIC hier_mcheck=%08x %08x %08x %08x\n",
+ bic_regs->hier_mcheck[0],
+ bic_regs->hier_mcheck[1],
+ bic_regs->hier_mcheck[2],
+ bic_regs->hier_mcheck[3]) ;
+
+}
diff --git a/drivers/net/bgp_torus/bgp_dma_ioctl.c b/drivers/net/bgp_torus/bgp_dma_ioctl.c
new file mode 100644
index 00000000000000..0873a360156cb4
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_ioctl.c
@@ -0,0 +1,677 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ * 'ioctl' and 'procfs' support
+ *
+ ********************************************************************/
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/bootmem.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+#include <net/tcp_hiatus.h>
+
+#include <spi/linux_kernel_spi.h>
+
+#include "bgp_dma_tcp.h"
+
+#include "bgp_bic_diagnosis.h"
+#include "../bgp_network/bgdiagnose.h"
+
+/* #define TRUST_TORUS_CRC */
+
+#define SEND_SHORT_FRAMES_INLINE
+#define ENABLE_TUNING
+
+#define ENABLE_LEARNING_ADDRESSES
+
+#if !defined(CONFIG_BLUEGENE_TCP_WITHOUT_NAPI)
+/* Select operation with linux 'dev->poll' */
+#define TORNIC_DEV_POLL
+
+/* #if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_VRNIC) */
+/* #define TORNIC_STEAL_POLL_CORE */
+/* #endif */
+
+#endif
+
+#if defined(CONFIG_TCP_CONGESTION_OVERRIDES)
+extern int sysctl_tcp_force_nodelay ;
+extern int sysctl_tcp_permit_cwnd ;
+extern int sysctl_tcp_max_cwnd ;
+#endif
+
+int sysctl_bgp_torus_backlog_floor ;
+int bgp_dma_sockproto ; /* Used elsewhere to control whether we try accelerated sockets */
+
+extern int bgtornic_driverparm ; /* Parametrisation for bringup of 'tornic' device */
+
+static int proc_dodcr(struct ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos) ;
+
+static int proc_dodcr_c8b(struct ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos) ;
+
+static int proc_dodcr(struct ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ int rc ;
+ TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+ dma_tcp_state.tuning_recfifo_threshold=mfdcrx(0xd3a) ;
+ rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+ mtdcrx(0xd3a,dma_tcp_state.tuning_recfifo_threshold) ;
+ TRACE("(<)") ;
+ return rc ;
+ }
+
+static int proc_dodcr_c8b(struct ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+ int rc ;
+ dumptorusdcrs() ;
+ TRACE("(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+ dma_tcp_state.tuning_dcr_c8b=mfdcrx(0xc8b) ;
+ rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+ mtdcrx(0xc8b,dma_tcp_state.tuning_dcr_c8b) ;
+ TRACE("(<)") ;
+ return rc ;
+ }
+
+
+
+static struct ctl_path bgp_torus_ctl_path[] = {
+ { .procname = "bgp", .ctl_name = 0, },
+ { .procname = "torus", .ctl_name = 0, },
+ { },
+};
+
+#define CTL_PARAM(Name,Var) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .data = &dma_tcp_state.Var , \
+ .maxlen = sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_dointvec \
+ }
+
+#define CTL_PARAM_DCR(Name,Var) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .data = &dma_tcp_state.Var , \
+ .maxlen = sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_dodcr \
+ }
+
+#define CTL_PARAM_DCR_C8B(Name,Var) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .data = &dma_tcp_state.Var , \
+ .maxlen = sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_dodcr_c8b \
+ }
+
+#define CTL_PARAM_HWFIFO(Name,Var) \
+ { \
+ .ctl_name = CTL_UNNUMBERED, \
+ .procname = Name , \
+ .data = &dma_tcp_state.Var , \
+ .maxlen = sizeof(int), \
+ .mode = 0644, \
+ .proc_handler = &proc_dohwfifo \
+ }
+
+struct ctl_table bgp_dma_table[] = {
+#if defined(USE_SKB_TO_SKB)
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "dma_rec_counters",
+ .data = bgp_dma_tcp_counter_copies,
+ .maxlen = DMA_NUM_COUNTERS_PER_GROUP*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_do_dma_rec_counters
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "flow_counter",
+ .data = dma_tcp_state.flow_counter,
+ .maxlen = k_flow_counters*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+#endif
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tracemask",
+ .data = &bgp_dma_tcp_tracemask,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "backlog_floor",
+ .data = &sysctl_bgp_torus_backlog_floor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sockproto",
+ .data = &bgp_dma_sockproto,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "ethem",
+ .data = &bgp_dma_ethem,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tornic_driverparm",
+ .data = &bgtornic_driverparm,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+/* { */
+/* .ctl_name = CTL_UNNUMBERED, */
+/* .procname = "tornic_count", */
+/* .data = &bgp_tornic_count, */
+/* .maxlen = sizeof(int), */
+/* .mode = 0644, */
+/* .proc_handler = &proc_dointvec */
+/* }, */
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tx_by_core",
+ .data = dma_tcp_state.tx_by_core,
+ .maxlen = 4*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tx_in_use_count",
+ .data = dma_tcp_state.tx_in_use_count,
+ .maxlen = (k_injecting_directions+1)*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "max_lifetime_by_direction",
+ .data = max_lifetime_by_direction,
+ .maxlen = (k_injecting_directions)*sizeof(unsigned long long),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+#endif
+ CTL_PARAM("bluegene_tcp_is_built",bluegene_tcp_is_built) ,
+ CTL_PARAM("count_no_skbuff",count_no_skbuff) ,
+#if defined(USE_SKB_TO_SKB)
+ CTL_PARAM("eager_limit",eager_limit) ,
+#endif
+#if defined(CONFIG_BGP_STATISTICS)
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "reception_fifo_histogram",
+ .data = reception_fifo_histogram,
+ .maxlen = 33*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "reception_fifo_histogram",
+ .data = reception_fifo_histogram,
+ .maxlen = 33*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "reception_hi_watermark",
+ .data = &reception_hi_watermark,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "rtt_histogram",
+ .data = rtt_histogram,
+ .maxlen = 33*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "transit_histogram",
+ .data = transit_histogram,
+ .maxlen = 33*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "resequence_histogram",
+ .data = dma_tcp_state.resequence_histogram,
+ .maxlen = k_concurrent_receives*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "bytes_sent",
+ .data = &dma_tcp_state.bytes_sent,
+ .maxlen = sizeof(unsigned long long),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "bytes_received",
+ .data = &dma_tcp_state.bytes_received,
+ .maxlen = sizeof(unsigned long long),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+#endif
+
+#if defined(CONFIG_TCP_HIATUS_COUNTS)
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tcp_hiatus_counts",
+ .data = tcp_hiatus_counts,
+ .maxlen = k_tcp_hiatus_reasons*sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tcp_force_nodelay",
+ .data = &sysctl_tcp_force_nodelay,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tcp_permit_cwnd",
+ .data = &sysctl_tcp_permit_cwnd,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tcp_max_cwnd",
+ .data = &sysctl_tcp_max_cwnd,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+#endif
+
+#if defined(ENABLE_TUNING)
+ CTL_PARAM("tuning_num_packets",tuning_num_packets) ,
+ CTL_PARAM("tuning_num_empty_passes",tuning_num_empty_passes) ,
+ CTL_PARAM("tuning_non_empty_poll_delay",tuning_non_empty_poll_delay) ,
+ CTL_PARAM("tuning_poll_after_enabling",tuning_poll_after_enabling) ,
+ CTL_PARAM("tuning_run_handler_on_hwi",tuning_run_handler_on_hwi) ,
+ CTL_PARAM("tuning_clearthresh_slih",tuning_clearthresh_slih) ,
+ CTL_PARAM("tuning_clearthresh_flih",tuning_clearthresh_flih) ,
+ CTL_PARAM("tuning_disable_in_dcr",tuning_disable_in_dcr) ,
+
+ CTL_PARAM("tuning_injection_hashmask",tuning_injection_hashmask) ,
+
+ CTL_PARAM_DCR("tuning_recfifo_threshold",tuning_recfifo_threshold) ,
+
+ CTL_PARAM("tuning_exploit_reversepropose",tuning_exploit_reversepropose) ,
+ CTL_PARAM("tuning_counters_per_source",tuning_counters_per_source) ,
+ CTL_PARAM("tuning_defer_skb_until_counter",tuning_defer_skb_until_counter) ,
+ CTL_PARAM("tuning_deliver_eagerly",tuning_deliver_eagerly) ,
+ CTL_PARAM("tuning_diagnose_rst",tuning_diagnose_rst) ,
+ CTL_PARAM("tuning_select_fifo_algorithm",tuning_select_fifo_algorithm) ,
+ CTL_PARAM("tuning_min_icsk_timeout",tuning_min_icsk_timeout) ,
+ CTL_PARAM("tuning_virtual_channel",tuning_virtual_channel) ,
+
+ CTL_PARAM_DCR_C8B("tuning_dcr_c8b",tuning_dcr_c8b) ,
+#endif
+#if defined(CONFIG_BGP_TORUS_DIAGNOSTICS)
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "tcp_scattergather_frag_limit",
+ .data = &tcp_scattergather_frag_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+#endif
+
+#if defined(KEEP_TCP_FLAG_STATS)
+ CTL_PARAM("tcp_count_fin",tcp_received_flag_count[7]) ,
+ CTL_PARAM("tcp_count_syn",tcp_received_flag_count[6]) ,
+ CTL_PARAM("tcp_count_rst",tcp_received_flag_count[5]) ,
+ CTL_PARAM("tcp_count_psh",tcp_received_flag_count[4]) ,
+ CTL_PARAM("tcp_count_ack",tcp_received_flag_count[3]) ,
+ CTL_PARAM("tcp_count_urg",tcp_received_flag_count[2]) ,
+ CTL_PARAM("tcp_count_ece",tcp_received_flag_count[1]) ,
+ CTL_PARAM("tcp_count_cwr",tcp_received_flag_count[0]) ,
+#endif
+ { 0 },
+};
+
+static void __init
+register_torus_sysctl(dma_tcp_t *dma_tcp)
+{
+ dma_tcp->sysctl_table_header=register_sysctl_paths(bgp_torus_ctl_path,bgp_dma_table) ;
+ TRACEN(k_t_init, "sysctl_table_header=%p",dma_tcp->sysctl_table_header) ;
+
+}
+
+/* feature for exploring all-to-all performance with a device in /dev */
+static int bgpdmatcp_add_device(int major, int minor, const char* name);
+static int bgpdmatcp_device_open(struct inode *inode, struct file *filp);
+static int bgpdmatcp_device_release(struct inode *inode, struct file * filp);
+static long bgpdmatcp_device_ioctl( struct file * filp,
+ unsigned int cmd, unsigned long arg);
+enum {
+ k_bgpdmatcp_major = 126 ,
+ k_bgpdmatcp_minor_nums = 1
+} ;
+
+struct bgpdmatcp_dev
+{
+ int major,minor; /* device major, minor */
+ struct task_struct* current; /* process holding device */
+ int signum; /* signal to send holding process */
+ wait_queue_head_t read_wq;
+ int read_complete;
+ struct semaphore sem; /* interruptible semaphore */
+ struct cdev cdev; /* container device? */
+};
+
+
+static struct bgpdmatcp_dev bgpdmatcp_device;
+
+
+static struct file_operations bgpdmatcp_device_fops =
+ {
+ .owner= THIS_MODULE,
+ .open= bgpdmatcp_device_open,
+ .read = NULL,
+ .write= NULL,
+ .poll= NULL,
+ .unlocked_ioctl= bgpdmatcp_device_ioctl,
+ .release= bgpdmatcp_device_release,
+ .mmap= NULL,
+ };
+
+
+static int bgpdmatcp_add_device(int major,
+ int minor,
+ const char* devname
+ )
+{
+ int ret;
+ dev_t devno;
+ struct bgpdmatcp_dev* dev = &bgpdmatcp_device;
+
+ /* initilize struct */
+ init_MUTEX (&dev->sem);
+ dev->major = major;
+ dev->minor = minor;
+ init_waitqueue_head(&dev->read_wq);
+ dev->read_complete = 0;
+ devno=MKDEV(major,minor);
+
+ /* register i.e., /proc/devices */
+ ret=register_chrdev_region(devno,1,(char *)devname);
+
+ if (ret) {
+ printk (KERN_WARNING "bgpdmatcp: couldn't register device (%d,%d) err=%d\n",
+ major,minor,ret);
+ return ret;
+ }
+
+ /* add cdev */
+ cdev_init(&dev->cdev,&bgpdmatcp_device_fops);
+ dev->cdev.owner=THIS_MODULE;
+ dev->cdev.ops=&bgpdmatcp_device_fops;
+ ret=cdev_add(&dev->cdev,devno,1);
+ if (ret) {
+ printk(KERN_WARNING "bgpdmatcp: couldn't register device (%d,%d), err=%d\n",
+ major,minor,ret);
+ return ret;
+ }
+
+ /* signul to pass to owning process, should be altered using ioctl */
+ dev->signum=-1;
+
+
+ return 0;
+}
+
+
+static int bgpdmatcp_device_open (struct inode *inode, struct file *filp)
+{
+ struct bgpdmatcp_dev *dev=container_of(inode->i_cdev,struct bgpdmatcp_dev,cdev);
+
+ if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+ up(&dev->sem);
+
+ dev->current=current;
+ filp->private_data = (void*) dev;
+
+ return 0;
+}
+
+
+
+
+
+static int bgpdmatcp_device_release (struct inode *inode, struct file * filp)
+{
+ struct bgpdmatcp_dev *dev=(struct bgpdmatcp_dev *)filp->private_data;
+
+ /*Ensure exclusive access*/
+ if (down_interruptible(&dev->sem)) return -ERESTARTSYS;
+
+ dev->current = NULL;
+ up(&dev->sem);
+
+ return 0;
+}
+
+/* Report the counts of how often a TCP write has stalled, by stall reason */
+static void bgp_dma_diag_report_hiatus_counts(int __user * report)
+{
+ copy_to_user(report,tcp_hiatus_counts,k_tcp_hiatus_reasons*sizeof(int)) ;
+}
+
+/* Report bytes read and bytes written over the torus */
+static void bgp_dma_diag_report_transfer_counts(int __user * report)
+{
+ copy_to_user(report,&dma_tcp_state.bytes_received,sizeof(unsigned long long)) ;
+ copy_to_user(report+sizeof(unsigned long long)/sizeof(int),&dma_tcp_state.bytes_sent,sizeof(unsigned long long)) ;
+}
+
+
+enum {
+ k_ioctl_activate = 0 ,
+ k_ioctl_wait = 1 ,
+ k_ioctl_clearcount = 2 ,
+ k_ioctl_activate_minicube = 3 ,
+ k_ioctl_wait_sync = 4 ,
+ k_ioctl_activate_to_one = 5 ,
+ k_ioctl_report_tx_queue = 6 ,
+ k_ioctl_report_hiatus_counts = 7 ,
+ k_ioctl_report_bytes_transferred = 8
+};
+static long bgpdmatcp_device_ioctl (
+ struct file * filp,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ TRACEN(k_t_detail, "cmd=%d arg=0x%08lx",cmd,arg) ;
+
+ switch (cmd) {
+ case k_ioctl_activate :
+ {
+ int sendBytes ;
+ if( get_user(sendBytes,(int __user *)arg) )
+ {
+ return -EFAULT ;
+ }
+ if( sendBytes <= k_injection_packet_size)
+ {
+ dma_tcp_transfer_activate_sync(sendBytes) ;
+ }
+/* else */
+/* { */
+/* dma_tcp_transfer_activate(sendBytes) ; */
+/* } */
+ }
+ break ;
+/* #if 0 */
+/* case k_ioctl_wait : */
+/* { */
+/* int demandCount ; */
+/* int rc ; */
+/* if( get_user(demandCount,(int __user *)arg) ) */
+/* { */
+/* return -EFAULT ; */
+/* } */
+/* rc = dma_tcp_transfer_wait(demandCount) ; */
+/* return rc ? 0 : (-EAGAIN) ; */
+/* } */
+/* break ; */
+/* #endif */
+ case k_ioctl_wait_sync :
+ {
+ int demandCount ;
+ int rc ;
+ if( get_user(demandCount,(int __user *)arg) )
+ {
+ return -EFAULT ;
+ }
+ rc = dma_tcp_transfer_wait_sync(demandCount) ;
+ return rc ? 0 : (-EAGAIN) ;
+ }
+ break ;
+ case k_ioctl_clearcount :
+ dma_tcp_transfer_clearcount() ;
+ break ;
+/* #if 0 */
+/* case k_ioctl_activate_minicube : */
+/* { */
+/* int sendBytes ; */
+/* if( get_user(sendBytes,(int __user *)arg) ) */
+/* { */
+/* return -EFAULT ; */
+/* } */
+/* dma_tcp_transfer_activate_minicube(sendBytes) ; */
+/* } */
+/* break ; */
+/* case k_ioctl_activate_to_one : */
+/* { */
+/* int sendBytes ; */
+/* unsigned int tg ; */
+/* if( get_user(sendBytes,(int __user *)arg) ) */
+/* { */
+/* return -EFAULT ; */
+/* } */
+/* if( get_user(tg,(int __user *)(arg+sizeof(int))) ) */
+/* { */
+/* return -EFAULT ; */
+/* } */
+/* dma_tcp_transfer_activate_to_one(sendBytes,tg) ; */
+/* } */
+/* break ; */
+/* #endif */
+ case k_ioctl_report_tx_queue :
+ bgp_dma_diag_report_transmission_queue((int __user *)arg) ;
+ break ;
+ case k_ioctl_report_hiatus_counts :
+ bgp_dma_diag_report_hiatus_counts((int __user *)arg) ;
+ break ;
+ case k_ioctl_report_bytes_transferred :
+ bgp_dma_diag_report_transfer_counts((int __user *)arg) ;
+ break ;
+ }
+ return 0;
+}
+
+void __init
+dma_tcp_devfs_procfs_init(dma_tcp_t * dma_tcp)
+{
+ bgpdmatcp_add_device(k_bgpdmatcp_major,0,"bgpdmatcp") ;
+ register_torus_sysctl(dma_tcp) ;
+}
diff --git a/drivers/net/bgp_torus/bgp_dma_memcpy.c b/drivers/net/bgp_torus/bgp_dma_memcpy.c
new file mode 100644
index 00000000000000..67d515b9057438
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_memcpy.c
@@ -0,0 +1,1321 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: copy_tofrom_user using the BGP DMA hardware
+ *
+ *
+ *
+ ********************************************************************/
+#define REQUIRES_DUMPMEM
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/pagemap.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <asm/bitops.h>
+#include <asm/div64.h>
+#include <linux/vmalloc.h>
+#include <asm/atomic.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+
+
+
+/* #include "bglink.h" */
+#include <spi/linux_kernel_spi.h>
+
+#include <asm/time.h>
+
+#include "bgp_dma_tcp.h"
+#include "bgp_bic_diagnosis.h"
+#include "../bgp_network/bgdiagnose.h"
+#include "../bgp_network/450_tlb.h"
+#include "bgp_memcpy.h"
+
+/* Machine memory geometry */
+enum {
+ k_l1_line_size = 32 ,
+ k_page_shift = PAGE_SHIFT ,
+ k_page_size = 1 << k_page_shift ,
+ k_page_offset_mask = k_page_size-1
+};
+/* How we are going to use the hardware */
+enum {
+ k_counters_per_core = 1 ,
+ k_spinlimit = 100000 ,
+ k_requires_fp = 0 ,
+ k_my_vc_for_adaptive = k_VC_anyway
+ /* k_my_vc_for_adaptive = k_VC_ordering */
+};
+/* What diagnostics/verification are we going to enable */
+enum {
+/* k_diagnose = 0 , */
+ k_diag_not_mapped = 1 ,
+ k_fromcheck_pre = 0 ,
+ k_fromcheck_post = 0,
+ k_tocheck_pre = 0,
+ k_tocheck_post = 0 ,
+ k_check_with_crc = 1 ,
+ k_flush_target_from_l1 = 0 ,
+ k_verify_dma = 1,
+ k_fixup_faulty_memcpy=1,
+ k_map_write_check=0 ,
+ k_disable_after_too_many_faults=1
+};
+
+/* value to let the counter get to when it is idle --- we do not want '0' because that would mean an interrupt */
+enum {
+ k_counter_idle_value = 0x00000010
+};
+
+
+/* For putting an 'msync'in where we don't think we should need it, but helping initial diagnostics */
+static inline void maybe_msync(void)
+{
+ _bgp_msync() ;
+}
+/* data cache block flush, evict the given line from L1 if it is there */
+static inline void dcbf(unsigned int a0,unsigned int a1)
+{
+ asm volatile( "dcbf %[a0],%[a1]"
+ :
+ : [a0] "b" (a0), [a1] "b" (a1)
+ ) ;
+}
+static inline void dcbf0(unsigned int a)
+{
+ asm volatile( "dcbf 0,%[a]"
+ :
+ : [a] "b" (a)
+ ) ;
+}
+static void flush_l1(void * address, unsigned int length)
+{
+ unsigned int address_int=(unsigned int) address ;
+ unsigned int address_end_int=address_int+length-1 ;
+ unsigned int line_start=address_int & ~(k_l1_line_size-1) ;
+ unsigned int line_end=address_end_int & ~(k_l1_line_size-1) ;
+ unsigned int line_count=(line_end-line_start)/k_l1_line_size + 1 ;
+ unsigned int x ;
+ unsigned int flush_address=line_start;
+ for(x=0;x<line_count;x+=1)
+ {
+ dcbf0(flush_address) ;
+ flush_address += k_l1_line_size ;
+ }
+}
+typedef struct {
+ unsigned int count ;
+ atomic_t in_use[k_counters_per_core] ;
+ unsigned int pad_to_line_size[(k_l1_line_size-k_counters_per_core-1)/sizeof(unsigned int)] ;
+} core_counter_allocation_t __attribute__((aligned(32)));
+
+static core_counter_allocation_t counter_allocation[k_injecting_cores] ;
+
+static void show_injection_fifo_state(dma_tcp_t * dma_tcp,unsigned int counter_index) ;
+static int acquire_counter(void)
+{
+ unsigned int this_core=smp_processor_id();
+ core_counter_allocation_t * cci = counter_allocation + this_core ;
+ unsigned int prev_count = cci->count++ ;
+ unsigned int counter_index = prev_count & (k_counters_per_core-1) ;
+ int in_use = atomic_inc_return(cci->in_use+counter_index) ;
+ int rc=(1 == in_use) ? (counter_index + this_core*k_counters_per_core) : -1 ;
+ dma_tcp_t * dma_tcp=&dma_tcp_state ;
+ TRACEN(k_t_dmacopy,"prev_count=0x%08x counter_index=%d in_use=%d rc=%d",prev_count,counter_index,in_use,rc) ;
+ if( 1 == in_use)
+ {
+ DMA_CounterSetValueBaseMaxHw(dma_tcp->memcpyRecCounterGroup.counter[rc].counter_hw_ptr,k_counter_idle_value,0,0x0fffffff) ;
+ show_injection_fifo_state(dma_tcp, rc) ;
+ }
+ return rc ;
+
+}
+static void release_counter(unsigned int counter)
+{
+ unsigned int counter_index=counter % k_counters_per_core ;
+ unsigned int core_index=counter / k_counters_per_core ;
+ core_counter_allocation_t * cci = counter_allocation + core_index ;
+ TRACEN(k_t_dmacopy,"counter=%d core_index=%d counter_index=%d in_use=%d",counter,core_index,counter_index,atomic_read(cci->in_use+counter_index)) ;
+ atomic_set(cci->in_use+counter_index,0) ;
+}
+
+static void cause_fallback(void)
+{
+ TRACEN(k_t_request,"Turning off DMA memcpy") ;
+ bgp_memcpy_control.use_dma = 0 ;
+ dma_memcpy_statistic(k_copy_cause_fallback) ;
+}
+
+static unsigned int find_real_address(const void * virtual_address)
+{
+ struct page *realpage = NULL ;
+ int res ;
+ /* Try to fault in all of the necessary pages */
+ down_read(&current->mm->mmap_sem);
+ res = get_user_pages(
+ current,
+ current->mm,
+ (unsigned long) virtual_address,
+ 1, /* One page */
+ 0, /* intent read */
+ 0, /* don't force */
+ &realpage,
+ NULL);
+ up_read(&current->mm->mmap_sem);
+
+ TRACEN(k_t_dmacopy,"find_real_address virtual_address=%p res=%d page=%p pfn=0x%08lx real_address=0x%016llx",
+ virtual_address,res,realpage,page_to_pfn(realpage),page_to_phys(realpage)) ;
+
+ if( 1 == res) /* Number of pages mapped, should be 1 for this call */
+ {
+ unsigned int rc = page_to_phys(realpage) ;
+ put_page(realpage) ;
+ return rc ;
+ }
+ return 0 ;
+
+}
+
+static unsigned int v_to_r_maybe_show(const void * vaddr)
+{
+ unsigned int vaddr_int=(unsigned int)vaddr ;
+ int tlbx=search_tlb_v(vaddr_int) ;
+ int pageid=get_tlb_pageid(tlbx) ;
+ int xlat=get_tlb_xlat(tlbx) ;
+ int attrib=get_tlb_attrib(tlbx) ;
+ int tlbx1=search_tlb_v((unsigned int)vaddr) ;
+ if( (tlbx == tlbx1) /* Translation didn't change under me due to e.g. interrupt */
+ && ((pageid & TLB0_V) != 0) /* TLB is valid */
+ && ((tlbx & 0x20000000) != 0) /* search_tlb_v sets this bit if it found a translation */
+ )
+ {
+ unsigned int epn = TLB0_EPN_1K(pageid) ; // virtual page for the TLB
+ unsigned int rpn = TLB1_RPN_1K(xlat) ; // real page for the TLB
+ unsigned int result = (vaddr_int-epn) + rpn ;
+ TRACEN(k_t_request,"vaddr=%p tlbx=0x%08x pageid=0x%08x xlat=0x%08x attrib=0x%08x epn=0x%08x rpn=0x%08x result=0x%08x",
+ vaddr,tlbx,pageid,xlat,attrib,epn,rpn,result) ;
+ return result ;
+
+ }
+ else
+ {
+ TRACEN(k_t_request,"vaddr=%p tlbx=0x%08x pageid=0x%08x tlbx1=0x%08x unmapped",
+ vaddr,tlbx,pageid,tlbx1) ;
+ tlbx=search_tlb_v(vaddr_int) ;
+ pageid=get_tlb_pageid(tlbx) ;
+ xlat=get_tlb_xlat(tlbx) ;
+ attrib=get_tlb_attrib(tlbx) ;
+ tlbx1=search_tlb_v((unsigned int)vaddr) ;
+ {
+ unsigned int epn = TLB0_EPN_1K(pageid) ; // virtual page for the TLB
+ unsigned int rpn = TLB1_RPN_1K(xlat) ; // real page for the TLB
+ unsigned int result = (vaddr_int-epn) + rpn ;
+ TRACEN(k_t_request,"retry vaddr=%p tlbx=0x%08x pageid=0x%08x xlat=0x%08x attrib=0x%08x epn=0x%08x rpn=0x%08x result=0x%08x",
+ vaddr,tlbx,pageid,xlat,attrib,epn,rpn,result) ;
+ }
+
+ return (unsigned int) -1 ; // Not mapped
+ }
+}
+
+static unsigned int v_to_r(const void * vaddr, tlb_t *t)
+{
+ unsigned int rc=v_to_r_maybe(vaddr,t) ;
+ unsigned int rc2=v_to_r_maybe(vaddr,t) ;
+ if( rc != rc2)
+ {
+ dma_memcpy_statistic(k_copy_inconsistent_tlb_1_info) ;
+ rc=rc2 ;
+ rc2=v_to_r_maybe(vaddr,t) ;
+ }
+ if( rc != rc2)
+ {
+
+ dma_memcpy_statistic(k_copy_inconsistent_tlb_1_rejects) ;
+ TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_1",vaddr,rc,rc2) ;
+ return 0xffffffff ;
+ }
+ if( 0xffffffff == rc) // Not mapped, touch the address and see what happens
+ {
+ unsigned int pageInt ;
+ int getrc = get_user(pageInt,(unsigned int __user *)vaddr ) ;
+ _bgp_msync() ;
+ if( getrc )
+ {
+ TRACEN(k_t_general,"Unmapped : %p",vaddr) ;
+ rc =(unsigned int) -1 ; // Not mapped
+ }
+ else
+ {
+ rc=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely
+ rc2=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely
+ if( rc != rc2)
+ {
+ dma_memcpy_statistic(k_copy_inconsistent_tlb_2_info) ;
+ rc=rc2 ;
+ rc2=v_to_r_maybe(vaddr,t) ;
+ }
+ if( rc != rc2)
+ {
+ dma_memcpy_statistic(k_copy_inconsistent_tlb_2_rejects) ;
+ TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_2",vaddr,rc,rc2) ;
+ return 0xffffffff ;
+ }
+ dma_memcpy_statistic(k_copy_tlb_touches) ;
+ }
+ }
+ return rc ;
+}
+static unsigned int v_to_r_write(const void * vaddr, tlb_t *t)
+{
+ unsigned int rc=v_to_r_maybe(vaddr,t) ;
+ unsigned int rc2=v_to_r_maybe(vaddr,t) ;
+ if( rc != rc2)
+ {
+ dma_memcpy_statistic(k_copy_inconsistent_tlb_1_info) ;
+ rc=rc2 ;
+ rc2=v_to_r_maybe(vaddr,t) ;
+ }
+ if( rc != rc2)
+ {
+
+ dma_memcpy_statistic(k_copy_inconsistent_tlb_1_rejects) ;
+ TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_1",vaddr,rc,rc2) ;
+ return 0xffffffff ;
+ }
+ if( 0xffffffff == rc) // Not mapped, touch the address and see what happens
+ {
+ unsigned int pageInt =0;
+ int putrc = get_user(pageInt,(unsigned int __user *)vaddr ) ;
+ _bgp_msync() ;
+ if( putrc )
+ {
+ TRACEN(k_t_general,"Unmapped : %p",vaddr) ;
+ rc =(unsigned int) -1 ; // Not mapped
+ }
+ else
+ {
+ rc=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely
+ rc2=v_to_r_maybe(vaddr,t) ; // Try the lookup again; could miss (if we get an interrupt) but not likely
+ if( rc != rc2)
+ {
+ dma_memcpy_statistic(k_copy_inconsistent_tlb_2_info) ;
+ rc=rc2 ;
+ rc2=v_to_r_maybe(vaddr,t) ;
+ }
+ if( rc != rc2)
+ {
+ dma_memcpy_statistic(k_copy_inconsistent_tlb_2_rejects) ;
+ TRACEN(k_t_request,"vaddr=%p rc=0x%08x rc2=0x%08x tlb_2",vaddr,rc,rc2) ;
+ return 0xffffffff ;
+ }
+ dma_memcpy_statistic(k_copy_tlb_touches) ;
+ }
+ }
+ return rc ;
+}
+static inline void create_dma_descriptor_memcpy(dma_tcp_t *dma_tcp,
+ int injection_counter,
+ int reception_counter,
+ dma_addr_t dataAddr,
+ int msglen,
+ unsigned int offset,
+ DMA_InjDescriptor_t *desc
+ )
+{
+ int ret1 __attribute((unused));
+ TRACEN(k_t_dmacopy , "(>) memcpying injection_counter=%d reception_counter=%d dataAddr=0x%08llx msglen=0x%08x offset=0x%08x desc=%p",injection_counter,reception_counter,dataAddr,msglen,offset,desc);
+ if( 0 == msglen)
+ {
+ TRACEN(k_t_error , "(E) zero length memcpying injection_counter=%d reception_counter=%d dataAddr=0x%08llx msglen=0x%08x offset=0x%08x desc=%p",injection_counter,reception_counter,dataAddr,msglen,offset,desc);
+ }
+ ret1 = DMA_LocalDirectPutDescriptor( desc,
+ k_InjectionCounterGroup, /* inj cntr group id */
+ injection_counter, /* inj counter id */
+ dataAddr, /* send offset */
+ k_ReceptionCounterGroupMemcpy, /* rec ctr grp */
+ reception_counter,
+ offset, /* reception offset */
+ msglen /* message length */
+ );
+
+ TRACEN(k_t_dmacopy , "(<) ret1=%d",ret1);
+
+}
+
+static void diagnose_injection_fifo(DMA_InjFifo_t *f_ptr)
+{
+ int free_space_0 = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 0, /* Use shadow head */
+ 0);/* use shadow tail */
+ int free_space_1 = DMA_FifoGetFreeSpace( &f_ptr->dma_fifo,
+ 1, /* Use hardware head */
+ 0);/* use shadow tail */
+ TRACEN(k_t_request,"free_space_0=0x%08x free_space_1=0x%08x",free_space_0,free_space_1) ;
+}
+
+static void diagnose_injection_fifo_by_id(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id
+ )
+{
+ diagnose_injection_fifo(&fg_ptr->fifos[fifo_id]) ;
+}
+
+static inline int inject_dma_descriptor_memcpy(dma_tcp_t *dma_tcp,
+ unsigned int desired_fifo,
+ DMA_InjDescriptor_t *desc)
+ {
+ int ret __attribute__((unused));
+ TRACEN(k_t_dmacopy , "(>) injecting desired_fifo=%d desc=%p",desired_fifo,desc);
+ maybe_msync() ;
+ ret = DMA_InjFifoInjectDescriptorById( &dma_tcp->memcpyInjFifoGroupFrames,
+ dma_tcp->memcpyInjFifoFramesIds[desired_fifo],
+ desc );
+ maybe_msync() ;
+ if(ret != 1 )
+ {
+ TRACEN(k_t_error,"(!!!) ret=%d",ret) ;
+ diagnose_injection_fifo_by_id(
+ &dma_tcp->memcpyInjFifoGroupFrames,
+ dma_tcp->memcpyInjFifoFramesIds[desired_fifo]
+ ) ;
+
+ }
+
+ TRACEN(k_t_general , "(<) ret=%d",ret);
+ return 1 ;
+
+ }
+static void show_injection_fifo_state(dma_tcp_t * dma_tcp,unsigned int counter_index) ;
+static int instrument_copy_user_address_within_page(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,const void * partner_vaddr,copy_op_t *c) ;
+
+typedef struct {
+ void * address ;
+ const void * partner_address ;
+ unsigned int size ;
+} memcpy_control;
+
+static unsigned int dma_copy_partial(dma_tcp_t * dma_tcp,unsigned int counter_index, memcpy_control * mc,copy_op_t *c)
+{
+ void * address = mc->address ;
+ const void * partner_address = mc->partner_address ;
+ unsigned int size = mc->size ;
+ unsigned int address_int = (unsigned int) address ;
+ unsigned int partner_address_int = (unsigned int ) partner_address ;
+
+ unsigned int address_offset=address_int & k_page_offset_mask ;
+ unsigned int partner_address_offset=partner_address_int & k_page_offset_mask ;
+ unsigned int lim_address=min(size,k_page_size-address_offset) ;
+ unsigned int lim_partner_address=min(size,k_page_size-partner_address_offset) ;
+ unsigned int lim_size=min(lim_address,lim_partner_address) ;
+ if( k_diagnose) c->frag_index += 1;
+
+ TRACEN(k_t_dmacopy,"address=%p partner_address=%p size=0x%08x lim_size=0x%05x",
+ address,partner_address,size,lim_size) ;
+
+ mc->address = address+lim_size ;
+ mc->partner_address = partner_address+lim_size ;
+ mc->size = size-lim_size ;
+
+ return instrument_copy_user_address_within_page(dma_tcp,counter_index,address,lim_size,partner_address,c) ;
+}
+
+/* return 0 iff the range described fits within one page */
+static int crosses_page_boundary(const void * address, unsigned int size)
+{
+ unsigned int a=(unsigned int) address ;
+ unsigned int ae = a+size-1 ;
+ return (ae >> k_page_shift ) - (a >> k_page_shift) ;
+}
+static unsigned int dma_copy_full_singlepage(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address,const void * partner_address,unsigned int size,copy_op_t *c)
+{
+ unsigned int rc ;
+ TRACEN(k_t_dmacopy,"(>) address=%p partner_address=%p size=0x%08x",
+ address,partner_address,size) ;
+ rc=instrument_copy_user_address_within_page(dma_tcp,counter_index,address,size,partner_address,c) ;
+ TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+ return rc ;
+}
+static unsigned int dma_copy_full(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address,const void * partner_address,unsigned int size,copy_op_t *c)
+{
+ unsigned int rc=0 ;
+ memcpy_control mc ;
+ TRACEN(k_t_dmacopy,"(>) address=%p partner_address=%p size=0x%08x",
+ address,partner_address,size) ;
+ mc.address=address ;
+ mc.partner_address=partner_address ;
+ mc.size=size ;
+ while(mc.size != 0 && rc == 0)
+ {
+ rc |= dma_copy_partial(dma_tcp,counter_index,&mc,c) ;
+ }
+ TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+ return rc ;
+}
+static unsigned int dma_copy_within_page(dma_tcp_t * dma_tcp,unsigned int counter_index, unsigned int real_address, unsigned int partner_real_address, unsigned int size,copy_op_t *c)
+{
+ unsigned int full_frame_count=size / k_torus_link_payload_size ;
+ unsigned int full_frame_size = full_frame_count * k_torus_link_payload_size ;
+ unsigned int trailing_frame_size = size - full_frame_size ;
+ unsigned int rc=0 ;
+
+ DMA_InjDescriptor_t desc ;
+ TRACEN(k_t_dmacopy,"(>) counter_index=%d real_address=0x%08x partner_real_address=0x%08x size=0x%05x full_frame_count=%d full_frame_size=0x%08x trailing_frame_size=0x%08x",
+ counter_index,real_address,partner_real_address,size,full_frame_count,full_frame_size,trailing_frame_size) ;
+ if( k_requires_fp)
+ {
+ enable_kernel_fp() ;
+ }
+ if( full_frame_size > 0 )
+ {
+ create_dma_descriptor_memcpy(dma_tcp,0,counter_index,partner_real_address,full_frame_size,real_address,&desc) ;
+ inject_dma_descriptor_memcpy(dma_tcp,counter_index,&desc) ;
+ rc = 1 ;
+ }
+ if( trailing_frame_size > 0 )
+ {
+ show_injection_fifo_state(dma_tcp,counter_index) ;
+ create_dma_descriptor_memcpy(dma_tcp,0,counter_index,partner_real_address+full_frame_size,trailing_frame_size,real_address + full_frame_size,&desc) ;
+ inject_dma_descriptor_memcpy(dma_tcp,counter_index,&desc) ;
+ rc+=1 ;
+ }
+ return rc ;
+}
+
+static void spin_idle(unsigned int idlecount)
+{
+ unsigned int x ;
+ for(x=0;x<idlecount;x+=1)
+ {
+ asm volatile("nop;");
+ }
+}
+
+/* Engage in least-squares regression to estimate data rates */
+dma_statistic_t bgp_dma_rate ;
+static void rate_observe(dma_statistic_t * st,int x,int y)
+{
+ int s1 = st->s1 + 1;
+ int sx = st->sx + x;
+ long long int sxx = st->sxx + x*x ;
+ int sy = st->sy + y ;
+ long long int sxy = st->sxy + x*y ;
+
+
+ st->s1 = s1 ;
+ st->sx = sx ;
+ st->sxx = sxx ;
+ st->sy = sy ;
+ st->sxy = sxy ;
+
+ if( ((s1 >> 1) & 0xff ) <= bgp_memcpy_control.rate_observe_report_count ) /* Sample a few */
+ {
+ long long det=s1*sxx-((long long)sx)*sx ;
+ long long m0 = s1*sxy - ((long long)sx)*sy ;
+ long long m1 = sxx*sy -sx*sxy ;
+ unsigned long long q0 = m0 ;
+ unsigned long long q1 = m1 ;
+ unsigned int uidet = det ;
+ if( uidet != 0)
+ {
+ do_div(q0,uidet) ;
+ do_div(q1,uidet) ;
+ }
+ else
+ {
+ q0 = 0 ;
+ q1 = 0 ;
+ }
+
+ TRACEN(k_t_request,"x=%d y=%d s1=%d sx=%d sxx=%lld sy=%d sxy=%lld det=%lld m0=%lld m1=%lld q0=%lld q1=%lld",
+ x,y,s1,sx,sxx,sy,sxy,det,m0,m1,q0,q1) ;
+ }
+
+}
+static int await_copy_completion(dma_tcp_t * dma_tcp,unsigned int counter_index, unsigned int size )
+{
+ int rc=0 ;
+ unsigned int fifo_current_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+ unsigned int fifo_initial_head = fifo_current_head ;
+ unsigned int fifo_tail =
+ (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+ unsigned int spincount = 0 ;
+ unsigned int initial_rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ;
+ unsigned int idlecount=bgp_memcpy_control.cycles_per_packet*size/256 ;
+ TRACEN(k_t_dmacopy,"(>) counter_index=%d size=0x%08x fifo_current_head=0x%08x fifo_tail=0x%08x initial_rec_counter_val=%d idlecount=%d",
+ counter_index,size,fifo_current_head,fifo_tail,initial_rec_counter_val,idlecount) ;
+ show_injection_fifo_state(dma_tcp,counter_index) ;
+ spin_idle(idlecount) ;
+ maybe_msync() ;
+ {
+ int rec_counter_after_idle=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ;
+ int rec_counter_val = rec_counter_after_idle ;
+ if( rec_counter_after_idle > 0)
+ {
+ rate_observe(&bgp_dma_rate, 0,0) ;
+ rate_observe(&bgp_dma_rate, idlecount,initial_rec_counter_val-rec_counter_after_idle) ;
+ }
+/* while(fifo_current_head != fifo_tail && spincount < k_spinlimit ) */
+/* { */
+/* fifo_current_head = */
+/* (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; */
+/* // fifo_current_tail = */
+/* // (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ; */
+/* spincount += 1 ; */
+/* } */
+ while( rec_counter_val > k_counter_idle_value && spincount < k_spinlimit )
+ {
+ maybe_msync() ;
+ rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ;
+ spincount += 1 ;
+ }
+ maybe_msync() ;
+ DMA_CounterSetDisableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ;
+ fifo_current_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+ {
+/* unsigned int rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ; */
+ dma_memcpy_statistic((0==spincount) ? k_copy_await_idle_zero : ((1==spincount) ? k_copy_await_idle_high : k_copy_await_idle_low)) ;
+ TRACEN(k_t_dmacopy,
+ "size=0x%08x fifo_initial_head=0x%08x fifo_current_head=0x%08x fifo_tail=0x%08x initial_rec=%d after_idle=%d rec=%d spincount=%d idlecount=%d",
+ size,fifo_initial_head,fifo_current_head,fifo_tail,initial_rec_counter_val,rec_counter_after_idle,rec_counter_val,spincount,idlecount) ;
+ if( fifo_current_head != fifo_tail || rec_counter_val != k_counter_idle_value)
+ {
+ rc=1 ;
+ TRACEN(k_t_error,"(E) fifo_current_head=0x%08x fifo_tail=0x%08x spincount=%d rec_counter_val=%d",
+ fifo_current_head,fifo_tail,spincount,rec_counter_val) ;
+ }
+ TRACEN(k_t_dmacopy,"(<) rc=%d fifo_current_head=0x%08x fifo_tail=0x%08x spincount=%d rec_counter_val=%d",rc,fifo_current_head,fifo_tail,spincount,rec_counter_val) ;
+ }
+ }
+ return rc ;
+}
+
+static void show_injection_fifo_state(dma_tcp_t * dma_tcp,unsigned int counter_index)
+{
+ unsigned int fifo_current_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+ unsigned int fifo_current_tail =
+ (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+ unsigned int rec_counter_val=DMA_CounterGetValue(dma_tcp->memcpyRecCounterGroup.counter+counter_index) ;
+ unsigned int rec_counter_base=DMA_CounterGetBaseHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr) ;
+ unsigned int rec_counter_max=DMA_CounterGetMaxHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr) ;
+ unsigned int enabled=DMA_CounterGetEnabled(&dma_tcp->memcpyRecCounterGroup,0) ;
+ TRACEN(k_t_dmacopy,"counter_index=%d fifo_current_head=0x%08x fifo_current_tail=0x%08x rec_counter_val=0x%08x base=0x%08x max=0x%08x enabled=0x%08x",
+ counter_index,fifo_current_head,fifo_current_tail,rec_counter_val,rec_counter_base,rec_counter_max,enabled) ;
+
+}
+
+static inline int next_prbs(int seed)
+{
+ int ncmask = seed >> 31 ; /* 0x00000000 or 0xffffffff */
+ return (seed << 1) ^ (0x04C11DB7 & ncmask) ; /* CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */
+}
+
+static inline unsigned int rc_revise(unsigned int X0, unsigned int X1)
+{
+ if(k_check_with_crc)
+ {
+ return next_prbs(X0) ^ X1 ;
+ }
+ else
+ {
+ return X0+X1 ;
+ }
+
+}
+static unsigned int region_check_int(const unsigned int * ai, unsigned int intcount)
+{
+ unsigned int x ;
+ unsigned int rc=0 ;
+ for(x=0;x<intcount;x+=1)
+ {
+ rc=rc_revise(rc,*(ai++)) ;
+ }
+ return rc ;
+
+}
+static unsigned int region_check(const void * addr, unsigned int size)
+{
+ const unsigned int * ai = (const unsigned int *) addr ;
+ unsigned int intcount = size/sizeof(int) ;
+ unsigned int tailcount = size % sizeof(int) ;
+ unsigned int rc = region_check_int(ai,intcount) ;
+ if(tailcount )
+ {
+ const unsigned char * ac = (const unsigned char *) addr ;
+ unsigned int tail = (ac[size-3] << 16) | (ac[size-3] << 8) | ac[size-1] ;
+ rc=rc_revise(rc,tail) ;
+ }
+ return rc ;
+
+}
+static void report_faulty_memcpy(void * dest, const void * src, unsigned long size,copy_op_t *c)
+{
+ unsigned int * di = (unsigned int *) dest ;
+ const unsigned int * si = (const unsigned int *) src ;
+ unsigned char * dc = (unsigned char *) (dest) ;
+ const unsigned char * sc = (const unsigned char *) (src) ;
+ unsigned int x ;
+ unsigned int faultwordcount = 0 ;
+ unsigned int zsourcecount = 0 ;
+ v_to_r_maybe_show(dest) ;
+ v_to_r_maybe_show(src) ;
+ c->to_check_post=region_check(dest,size) ;
+ if( k_disable_after_too_many_faults)
+ {
+ int faults_to_go=bgp_memcpy_control.faults_until_disable-1 ;
+ if( faults_to_go <= 0 )
+ {
+ cause_fallback() ;
+ }
+ else
+ {
+ bgp_memcpy_control.faults_until_disable=faults_to_go ;
+ }
+ }
+ dma_memcpy_statistic(k_copy_verify_miscompares) ;
+ TRACEN(k_t_error,"dest=%p src=%p size=0x%08lx",dest,src,size) ;
+ for(x=0;x<size/sizeof(unsigned int);x+=1)
+ {
+ unsigned int sx = si[x] ;
+ unsigned int dx = di[x] ;
+ zsourcecount += (0 == sx) ;
+ if( dx != sx )
+ {
+ if( faultwordcount < 10 )
+ {
+ TRACEN(k_t_error,"(E) x=0x%08x di+x=%p si+x=%p di[x]=0x%08x si[x]=0x%08x",
+ x,di+x,si+x,dx,sx) ;
+ }
+ if( k_fixup_faulty_memcpy) di[x]=sx ;
+ faultwordcount += 1 ;
+ }
+ }
+ if( dc[size-3] != sc[size-3])
+ {
+ TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+ size-3,dc+size-3,sc+size-3,dc[size-3],sc[size-3]) ;
+ if( k_fixup_faulty_memcpy) dc[size-3]=sc[size-3] ;
+ }
+ if( dc[size-2] != sc[size-2])
+ {
+ TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+ size-2,dc+size-2,sc+size-2,dc[size-2],sc[size-2]) ;
+ if( k_fixup_faulty_memcpy) dc[size-2]=sc[size-2] ;
+ }
+ if( dc[size-1] != sc[size-1])
+ {
+ TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+ size-1,dc+size-1,sc+size-1,dc[size-1],sc[size-1]) ;
+ if( k_fixup_faulty_memcpy) dc[size-1]=sc[size-1] ;
+ }
+ TRACEN(k_t_error,"%d/%ld words incorrectly copied, %d sourcewords were zero",faultwordcount,size/sizeof(unsigned int),zsourcecount) ;
+ v_to_r_maybe_show(dest) ;
+ v_to_r_maybe_show(src) ;
+ show_stack(NULL,0) ;
+ c->from_check_post=region_check(src,size) ;
+ diagnose_faulty_copy(c) ;
+}
+/* Check that a 'memcpy' was accurately done ... */
+static int verify_memcpy(void * dest, const void * src, unsigned long size,copy_op_t *c)
+{
+ unsigned int * di = (unsigned int *) dest ;
+ const unsigned int * si = (const unsigned int *) src ;
+ unsigned char * dc = (unsigned char *) (dest) ;
+ const unsigned char * sc = (const unsigned char *) (src) ;
+ unsigned int q = di[0] ^ si[0] ;
+ unsigned int x ;
+ dma_memcpy_statistic(k_copy_verify_attempts) ;
+ TRACEN(k_t_dmacopy,"dest=%p src=%p size=0x%08lx di[0]=0x%08x si[0]=0x%08x",dest,src,size,di[0],si[0]) ;
+ for(x=1;x<size/sizeof(unsigned int);x+=1)
+ {
+ q |= *(++di) ^ *(++si) ;
+ }
+ q |= (dc[size-3] ^ sc[size-3]) |(dc[size-2] ^ sc[size-2]) |(dc[size-1] ^ sc[size-1]) ;
+ if(q) report_faulty_memcpy(dest,src,size,c) ;
+ return q ;
+}
+
+static int instrument_copy_user_address_within_page(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,const void * partner_vaddr,copy_op_t *c)
+{
+ unsigned int addr_int =(unsigned int) address ;
+ unsigned int start_page=(addr_int >> k_page_shift) ;
+ unsigned int end_page=((addr_int+size-1) >> k_page_shift) ;
+ unsigned int partner_address=(unsigned int) partner_vaddr ;
+ unsigned int partner_start_page=(partner_address >> k_page_shift) ;
+ unsigned int partner_end_page=((partner_address+size-1) >> k_page_shift) ;
+ TRACEN(k_t_dmacopy,"counter_index=%d address=%p size=0x%08lx partner_vaddr=%p start_page=0x%08x end_page=0x%08x partner_start_page=0x%08x partner_end_page=0x%08x",
+ counter_index,address,size,partner_vaddr,start_page,end_page,partner_start_page,partner_end_page) ;
+ maybe_msync() ;
+ if( end_page == start_page && partner_end_page == partner_start_page)
+ {
+ unsigned int real_address=v_to_r( address,&c->a_tlb) ;
+ unsigned int real_address_tablewalk=find_real_address(address) ;
+ unsigned int partner_real_address=v_to_r_write(partner_vaddr,&c->b_tlb) ;
+ unsigned int partner_real_address_tablewalk=find_real_address(partner_vaddr) ;
+ TRACEN(k_t_dmacopy,"address=%p real_address=0x%08x r_a_tablewalk=0x%08x partner_vaddr=%p partner_real_address=0x%08x p_r_a_tablewalk=0x%08x",address,real_address,real_address_tablewalk,partner_vaddr,partner_real_address,partner_real_address_tablewalk) ;
+ if( k_diagnose)
+ {
+ c->a_raddress=real_address ;
+ c->b_raddress=partner_real_address ;
+ }
+ if( 0xffffffff != real_address && 0xffffffff != partner_real_address)
+ {
+ unsigned int injection_count ;
+ TRACEN(k_t_dmacopy,"address=%p real_address=0x%08x r_a_tablewalk=0x%08x partner_vaddr=%p partner_real_address=0x%08x p_r_a_tablewalk=0x%08x",address,real_address,real_address_tablewalk,partner_vaddr,partner_real_address,partner_real_address_tablewalk) ;
+ if( k_flush_target_from_l1)
+ {
+ flush_l1(address,size) ;
+ }
+ injection_count=dma_copy_within_page(dma_tcp,counter_index,real_address,partner_real_address,size,c) ;
+ return 0 ;
+
+ }
+ if( 0xffffffff == real_address ) dma_memcpy_statistic(k_copy_source_tlb_rejects) ;
+ if( 0xffffffff == partner_real_address ) dma_memcpy_statistic(k_copy_target_tlb_rejects) ;
+ return 1 ;
+ }
+ dma_memcpy_statistic(k_copy_spanpage_rejects) ;
+ return 1 ; // At least one of the addresses wasn't mapped, or things spanned a page boundary
+
+}
+
+static int instrument_copy_user_address(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,dma_addr_t partner_addr, const void * partner_vaddr,copy_op_t *c)
+{
+ int rc ;
+ {
+ rc= dma_copy_full(dma_tcp,counter_index,address, partner_vaddr,size,c) ;
+ if( 0 == rc)
+ {
+ rc = await_copy_completion(dma_tcp,counter_index,size) ;
+ }
+ }
+ if( 0 == rc && k_verify_dma && bgp_memcpy_control.verify_dma)
+ {
+ {
+ rc = verify_memcpy(address, partner_vaddr, size,c) ;
+ if(rc)
+ {
+ TRACEN(k_t_error,"trapped") ;
+ }
+ }
+ }
+ return rc ;
+
+}
+static int instrument_copy_user_address_singlepage(dma_tcp_t * dma_tcp,unsigned int counter_index,void * address, unsigned long size,dma_addr_t partner_addr, const void * partner_vaddr,copy_op_t *c)
+{
+ int rc ;
+ {
+ rc= dma_copy_full_singlepage(dma_tcp,counter_index, address, partner_vaddr,size,c) ;
+ if( 0 == rc)
+ {
+ rc = await_copy_completion(dma_tcp,counter_index,size) ;
+ }
+ }
+ if( 0 == rc && k_verify_dma && bgp_memcpy_control.verify_dma)
+ {
+ {
+ rc = verify_memcpy(address, partner_vaddr, size,c) ;
+ if(rc)
+ {
+ TRACEN(k_t_error,"trapped") ;
+ }
+ }
+ }
+ return rc ;
+
+}
+static int instrument_copy_user(void * to, const void * from, unsigned long size,unsigned int counter_index,copy_op_t *c)
+{
+ dma_tcp_t * dma_tcp=&dma_tcp_state ;
+ dma_addr_t fromAddr = dma_map_single(NULL, (void *)from, size, DMA_TO_DEVICE);
+ int rc ;
+ TRACEN(k_t_dmacopy,"(>)") ;
+ maybe_msync() ;
+ DMA_CounterSetValueHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr,size+k_counter_idle_value) ;
+ show_injection_fifo_state(dma_tcp, counter_index) ;
+ DMA_CounterSetEnableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ;
+ show_injection_fifo_state(dma_tcp, counter_index) ;
+ maybe_msync() ;
+ DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, dma_tcp->injCounterId, 0xffffffff );
+ _bgp_msync() ;
+ rc= instrument_copy_user_address(dma_tcp,counter_index,to,size,fromAddr,(void *)from,c) ;
+ TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+ return rc ;
+}
+static int instrument_copy_user_singlepage(void * to, const void * from, unsigned long size,unsigned int counter_index,copy_op_t *c)
+{
+ dma_tcp_t * dma_tcp=&dma_tcp_state ;
+ dma_addr_t fromAddr = dma_map_single(NULL, (void *)from, size, DMA_TO_DEVICE);
+ int rc ;
+ TRACEN(k_t_dmacopy,"(>)") ;
+ maybe_msync() ;
+ show_injection_fifo_state(dma_tcp, counter_index) ;
+ DMA_CounterSetValueHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr,size+k_counter_idle_value) ;
+ show_injection_fifo_state(dma_tcp, counter_index) ;
+ DMA_CounterSetEnableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ;
+ show_injection_fifo_state(dma_tcp, counter_index) ;
+ maybe_msync() ;
+ DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, dma_tcp->injCounterId, 0xffffffff );
+ _bgp_msync() ;
+ rc= instrument_copy_user_address_singlepage(dma_tcp,counter_index,to,size,fromAddr,from,c) ;
+ TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+ return rc ;
+}
+static int instrument_copy_tofrom_user(void * to, const void * from, unsigned long size,copy_op_t *c)
+{
+ int rc=1 ;
+ int counter_index=acquire_counter() ;
+ TRACEN(k_t_dmacopy,"(>) to=%p from=%p size=0x%08lx counter_index=%d",to,from,size,counter_index) ;
+ if( counter_index >= 0)
+ {
+ rc= instrument_copy_user(to,from,size,counter_index,c) ;
+ release_counter(counter_index) ;
+ }
+ else
+ {
+ dma_memcpy_statistic(k_copy_no_counter_rejects) ;
+ }
+ TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+ return rc ;
+}
+
+static int instrument_copy_tofrom_user_singlepage(void *to, const void * from, unsigned long size,copy_op_t *c)
+{
+ int rc=1 ;
+ int counter_index=acquire_counter() ;
+ TRACEN(k_t_dmacopy,"(>) to=%p from=%p size=0x%08lx counter_index=%d",to,from,size,counter_index) ;
+ if( counter_index >= 0)
+ {
+ rc= instrument_copy_user_singlepage(to,from,size,counter_index,c) ;
+ release_counter(counter_index) ;
+ }
+ else
+ {
+ dma_memcpy_statistic(k_copy_no_counter_rejects) ;
+ }
+ TRACEN(k_t_dmacopy,"(<) rc=%d",rc) ;
+ return rc ;
+}
+
+static int all_pages_mapped_read(unsigned long address, unsigned long size)
+{
+ unsigned int start_page=(address >> k_page_shift) ;
+ unsigned int end_page=((address+size) >> k_page_shift) ;
+ unsigned int page_count = end_page-start_page+1 ;
+ unsigned int x ;
+ if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK
+ /* Defend against the possibility that the user application has posted an unmapped address */
+ for(x=0;x<page_count;x+=1)
+ {
+ int pageInt ;
+ int __user * pageIntP = (int __user *) ((start_page+x) << k_page_shift) ;
+ if( get_user(pageInt,pageIntP) )
+ {
+ TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+ if( k_diag_not_mapped)
+ {
+ tlb_t t ;
+ unsigned int r=v_to_r_maybe((void *)address, &t) ;
+ TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+ TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+ diagnose_tlb(&t) ;
+ }
+
+ return 1;
+ }
+
+ }
+ return 0 ;
+}
+
+static int all_pages_mapped_write(unsigned long address, unsigned long size)
+{
+ unsigned int start_page=(address >> k_page_shift) ;
+ unsigned int end_page=((address+size) >> k_page_shift) ;
+ unsigned int page_count = end_page-start_page+1 ;
+ unsigned int x ;
+/* int pageInt ; */
+ char __user * pageCharP = (char __user *) address ;
+ if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK
+ if(put_user(0,pageCharP))
+ {
+ TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+ if( k_diag_not_mapped)
+ {
+ tlb_t t ;
+ unsigned int r=v_to_r_maybe((void *)address, &t) ;
+ TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+ TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+ diagnose_tlb(&t) ;
+ }
+
+ return 1;
+ }
+ /* Defend against the possibility that the user application has posted an unmapped address */
+ for(x=1;x<page_count;x+=1)
+ {
+/* int pageInt ; */
+ char __user * pageCharP = (char __user *) ((start_page+x) << k_page_shift) ;
+/* TODO: Fix this up against the possibility of 0..2 bytes at the start of the last page */
+ if( put_user(0,pageCharP) )
+ {
+ TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+ if( k_diag_not_mapped)
+ {
+ tlb_t t ;
+ unsigned int r=v_to_r_maybe((void *)address, &t) ;
+ TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x is_kernel_addr=%d",((start_page+x) << k_page_shift),start_page,page_count,is_kernel_addr(((start_page+x) << k_page_shift))) ;
+ TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+ diagnose_tlb(&t) ;
+ }
+
+ return 1;
+ }
+
+ }
+ return 0 ;
+}
+
+/* Returns 1 if we could DMA-copy things, 0 if we couldn't */
+extern unsigned long bgp_dma_instrument_copy_tofrom_user(void *to,
+ const void *from, unsigned long size)
+{
+ TRACEN(k_t_general,"to=%p from=%p size=0x%08lx",to,from,size) ;
+ dma_memcpy_statistic(k_copy_tofrom_user_calls) ;
+ if( size > 0 && size >= bgp_memcpy_control.dma_threshold )
+ {
+ copy_op_t c ;
+ TRACEN(k_t_dmacopy,"to=%p from=%p size=0x%08lx",to,from,size) ;
+ if( all_pages_mapped_read((unsigned long) from,size))
+ {
+ dma_memcpy_statistic(k_copy_source_rejects) ;
+ return 1 ;
+ }
+ if( k_map_write_check && all_pages_mapped_write((unsigned long) to,size))
+ {
+ dma_memcpy_statistic(k_copy_target_rejects) ;
+ return 1 ;
+ }
+ if( k_diagnose)
+ {
+ c.to_vaddr=to ;
+ c.from_vaddr=(void *)from ;
+ c.size=size ;
+ c.frag_index=0 ;
+ c.from_check_post = 0xffffffff ;
+ c.to_check_pre = 0xffffffff ;
+ c.to_check_post = 0xffffffff ;
+ if(k_fromcheck_pre)
+ {
+ c.from_check_pre=region_check((void *)from,size) ;
+ }
+ else
+ {
+ c.from_check_pre = 0xffffffff ;
+ }
+ if(k_tocheck_pre)
+ {
+ c.to_check_pre=region_check(to,size) ;
+ }
+ else
+ {
+ c.to_check_pre = 0xffffffff ;
+ }
+ }
+
+
+ if( crosses_page_boundary(from,size) || crosses_page_boundary(to,size))
+ {
+ if( bgp_memcpy_control.handle_pagecrossing)
+ {
+
+ unsigned long rc= instrument_copy_tofrom_user(to,from,size,&c) ;
+ dma_memcpy_statistic((0==rc) ? k_copy_accelerate_successes : k_copy_accelerate_rejects) ;
+ TRACEN(k_t_dmacopy,"rc=%ld",rc) ;
+ if(k_diagnose && 0 == rc )
+ {
+ if(k_fromcheck_post)
+ {
+ c.from_check_post=region_check(from,size) ;
+ }
+ if(k_tocheck_post)
+ {
+ c.to_check_post=region_check(to,size) ;
+ }
+ if( (k_fromcheck_pre && k_fromcheck_post && c.from_check_post != c.from_check_pre)
+ ||
+ (k_fromcheck_pre && k_tocheck_post && c.from_check_pre != c.to_check_post)
+ ||
+ (k_fromcheck_post && k_tocheck_post && c.from_check_post != c.to_check_post)
+ )
+ {
+ diagnose_faulty_copy(&c) ;
+ return 1 ;
+ }
+ }
+ return rc ;
+ }
+ else
+ {
+ dma_memcpy_statistic(k_copy_crosspage_limitation_rejects) ;
+ return 1 ;
+ }
+ }
+ else
+ {
+ {
+ unsigned long rc= instrument_copy_tofrom_user_singlepage(to,from,size,&c) ;
+ dma_memcpy_statistic((0==rc) ? k_copy_accelerate_successes : k_copy_accelerate_rejects) ;
+ TRACEN(k_t_dmacopy,"rc=%ld",rc) ;
+ if(k_diagnose && 0 == rc )
+ {
+ if(k_fromcheck_post)
+ {
+ c.from_check_post=region_check(from,size) ;
+ }
+ if(k_tocheck_post)
+ {
+ c.to_check_post=region_check(to,size) ;
+ }
+ if( (k_fromcheck_pre && k_fromcheck_post && c.from_check_post != c.from_check_pre)
+ ||
+ (k_fromcheck_pre && k_tocheck_post && c.from_check_pre != c.to_check_post)
+ ||
+ (k_fromcheck_post && k_tocheck_post && c.from_check_post != c.to_check_post)
+ )
+ {
+ diagnose_faulty_copy(&c) ;
+ return 1 ;
+ }
+ }
+
+ return rc ;
+ }
+
+ }
+ }
+ dma_memcpy_statistic(k_copy_size_rejects) ;
+ return 1 ; // Not copied, size under threshold
+
+}
+
+static struct ctl_table dma_memcpy_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "counter_allocation_0",
+ .data = counter_allocation+0,
+ .maxlen = sizeof(core_counter_allocation_t),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "counter_allocation_1",
+ .data = counter_allocation+1,
+ .maxlen = sizeof(core_counter_allocation_t),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "counter_allocation_2",
+ .data = counter_allocation+2,
+ .maxlen = sizeof(core_counter_allocation_t),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "counter_allocation_3",
+ .data = counter_allocation+3,
+ .maxlen = sizeof(core_counter_allocation_t),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ { 0 },
+} ;
+
+static struct ctl_path dma_memcpy_ctl_path[] = {
+ { .procname = "bgp", .ctl_name = 0, },
+ { .procname = "dmacopy", .ctl_name = 0, },
+ { },
+};
+
+static void __init
+bgp_dma_memcpy_init_counter_allocation(void)
+{
+ unsigned int core_index ;
+ register_sysctl_paths(dma_memcpy_ctl_path,dma_memcpy_table) ;
+ for(core_index=0;core_index<k_injecting_cores;core_index+=1)
+ {
+ core_counter_allocation_t * cci = counter_allocation + core_index ;
+ unsigned int counter_index ;
+ cci->count = 0;
+ for(counter_index=0;counter_index<k_counters_per_core;counter_index+=1)
+ {
+ atomic_set(cci->in_use+counter_index,0) ;
+ }
+
+ }
+ TRACEN(k_t_init,"counter_allocation initialised") ;
+}
+
+/* This gets driven in the FLIH when a DMA interrupt occurs */
+static void dummyCounterZeroHandler(u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+{
+ TRACEN(k_t_error,"(>) Unexpected interrupt" );
+ TRACEN(k_t_error,"(<)" );
+}
+
+/* 'copyin/out' via the BGP DMA is believed functional, but seems not useful since copying via the parallel FP regs */
+/* seems to run faster, even in cases where that wipes out the L1 cache. Code is left here in case someone wants to */
+/* try improving it, and to indicate which sections of the BGP DMA unit (injection fifo and reception counters) are needed */
+/* to make it work. */
+void __init
+bgp_dma_memcpyInit(dma_tcp_t * dma_tcp)
+{
+ bgp_dma_memcpy_init_counter_allocation() ;
+ {
+ int counter_index ;
+ for( counter_index=0; counter_index< k_injecting_cores; counter_index += 1 )
+ {
+ dma_tcp->memcpyInjFifoFramesPri[ counter_index ] = 0 ;
+ dma_tcp->memcpyInjFifoFramesLoc[ counter_index ] = 1 ;
+ dma_tcp->memcpyInjFifoFramesIds[ counter_index ] = counter_index ;
+ dma_tcp->memcpyInjFifoFramesMap[ counter_index ] = 0; /* 'memcpy' injector not connected to torus */
+ }
+ }
+ {
+ int ret = DMA_InjFifoGroupAllocate( k_InjectionFifoGroupMemcpy,
+ k_injecting_cores, /* num inj fifos */
+ dma_tcp->memcpyInjFifoFramesIds,
+ dma_tcp->memcpyInjFifoFramesPri,
+ dma_tcp->memcpyInjFifoFramesLoc,
+ dma_tcp->memcpyInjFifoFramesMap,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ & dma_tcp->memcpyInjFifoGroupFrames );
+
+ TRACEN(k_t_init,"(=)DMA_InjFifoGroupAllocate rc=%d", ret );
+
+ if( 0 == ret)
+ {
+ int counter_index ;
+ for( counter_index=0; counter_index< k_injecting_cores; counter_index += 1 )
+ {
+ TRACEN(k_t_init,"fg_ptr=%p fifo_id=%d va_start=%p va_head=%p va_end=%p",
+ &dma_tcp->memcpyInjFifoGroupFrames,
+ dma_tcp->memcpyInjFifoFramesIds[counter_index],
+ dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo,
+ dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo,
+ dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo+1
+ ) ;
+ {
+ int ret = DMA_InjFifoInitById( &dma_tcp->memcpyInjFifoGroupFrames,
+ dma_tcp->memcpyInjFifoFramesIds[counter_index],
+ dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo,
+ dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo, /* head */
+ dma_tcp->idma.idma_core[counter_index].memcpy_packet_fifo+1 /* end */
+ );
+
+ dma_tcp->idma.idma_core[counter_index].memcpy_fifo_initial_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->memcpyInjFifoGroupFrames, dma_tcp->memcpyInjFifoFramesIds[counter_index]) ;
+ TRACEN(k_t_init,"(=)DMA_InjFifoInitById rc=%d initial_head=0x%08x", ret , dma_tcp->idma.idma_core[counter_index].memcpy_fifo_initial_head);
+ }
+ }
+ }
+ /* Set up a reception counter for 'memcpy' */
+ {
+ /* Initialize reception counter group */
+ int ret __attribute__ ((unused)) = DMA_CounterGroupAllocate( DMA_Type_Reception,
+ k_ReceptionCounterGroupMemcpy, /* group number */
+ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP,
+ dma_tcp->memcpyRecCntrSubgrps,
+/* TODO: Not really taking interrupts from this counter group, but maybe it has to be coherent ? */
+// 0, /* target core for interrupts */
+// NULL, /* Not planning to take interrupts from memcpy counters */
+ 2, /* target core for interrupts */
+ dummyCounterZeroHandler,
+ NULL,
+ NULL,
+ & dma_tcp->memcpyRecCounterGroup );
+ TRACEN(k_t_init,"(=)DMA_CounterGroupAllocate rc=%d", ret );
+ }
+/* { */
+/* int counter_index ; */
+/* for( counter_index=0; counter_index< DMA_NUM_COUNTERS_PER_GROUP; counter_index += 1 ) */
+/* { */
+/* DMA_CounterSetDisableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ; */
+/* DMA_CounterSetValueBaseMaxHw(dma_tcp->memcpyRecCounterGroup.counter[counter_index].counter_hw_ptr,k_counter_idle_value,0,0xffffffff) ; */
+/* } */
+/* _bgp_msync() ; */
+/* // for( counter_index=0; counter_index< k_injecting_cores; counter_index += 1 ) */
+/* // { */
+/* // DMA_CounterSetEnableById(&dma_tcp->memcpyRecCounterGroup,counter_index) ; */
+/* // } */
+/* _bgp_msync() ; */
+/* } */
+
+
+
+}
+}
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp.c b/drivers/net/bgp_torus/bgp_dma_tcp.c
new file mode 100644
index 00000000000000..9e63e4e664db51
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp.c
@@ -0,0 +1,931 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ *
+ * Intent: Send a 'request block' to the partner's memory FIFO
+ * Partner initiates a 'remote read' from me
+ * Partner sends a 'response block' to my FIFO to say the data is transferred
+ *
+ ********************************************************************/
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/bootmem.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+#include <net/tcp_hiatus.h>
+
+#include <spi/linux_kernel_spi.h>
+
+#include "bgp_dma_tcp.h"
+
+#include "bgp_bic_diagnosis.h"
+#include "../bgp_network/bgdiagnose.h"
+
+/* #define TRUST_TORUS_CRC */
+
+#define SEND_SHORT_FRAMES_INLINE
+#define ENABLE_TUNING
+
+#define ENABLE_LEARNING_ADDRESSES
+
+#if !defined(CONFIG_BLUEGENE_TCP_WITHOUT_NAPI)
+/* Select operation with linux 'dev->poll' */
+#define TORNIC_DEV_POLL
+
+/* #if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR) && !defined(CONFIG_BGP_VRNIC) */
+/* #define TORNIC_STEAL_POLL_CORE */
+/* #endif */
+
+#endif
+
+
+/* #define REQUIRES_DUMPMEM */
+
+/* #if defined(CONFIG_BLUEGENE_TORUS_TRACE) */
+/* int bgp_dma_tcp_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */
+int bgp_dma_tcp_tracemask = k_t_init | k_t_request | k_t_error | k_t_congestion ; // | k_t_scattergather ;
+/* int bgp_dma_tcp_tracemask = k_t_init | k_t_request | k_t_error | k_t_congestion |k_t_irqflow|k_t_irqflow_rcv; */
+/* int bgp_dma_tcp_tracemask = 0xffffffff ; */
+/* int bgp_dma_tcp_tracemask = k_t_request | k_t_error ; */
+/* #endif */
+
+/* extern int sysctl_somaxconn ; // listening socket backlog, will want to increase this to allow at least 'n' SYNs per node in the block */
+/* #define DEBUG_CLEAR_SKB */
+
+//extern int bgp_dma_irq ; /* Interrupt number that the torus is using */
+
+enum {
+ k_fifo_irq = 124 , /* Linux interrupt number for 'fifo threshold crossing' interrupt */
+ k_rec_counter_irq = 132 /* Linux interrupt number for 'reception counter hit zero' interrupt */
+};
+
+enum {
+ k_find_source_of_rst_flags = 1 /* Whether to enable making a fuss about the source of a 'rst' frame */
+};
+
+#if defined(CONFIG_SMP) && !defined(CONFIG_BLUEGENE_UNIPROCESSOR)
+#define TORNIC_TORUS_AFFINITY
+#endif
+
+enum {
+ k_TorusAffinityCPU =
+#if defined(TORNIC_TORUS_AFFINITY)
+ 2
+#else
+ 0
+#endif
+};
+
+extern cpumask_t cpu_nouser_map; /* Added to support 'steal' of core prior to long-running softirq */
+
+int __init
+dma_tcp_module_init (void);
+/* void __exit dma_tcp_module_cleanup (void); */
+
+/* module_init(dma_tcp_module_init); */
+/* module_exit(dma_tcp_module_cleanup); */
+
+#if defined(CONFIG_BGP_STATISTICS)
+int rtt_histogram[33] ;
+int transit_histogram[33] ;
+#endif
+
+
+MODULE_DESCRIPTION("BG/P sockets over torus DMA driver");
+MODULE_LICENSE("GPL");
+
+
+#define TCP_DMA_NAME "tcp_bgp_dma"
+#ifndef CTL_UNNUMBERED
+#define CTL_UNNUMBERED -2
+#endif
+
+/* Routines related to interrupt management from bgp_bic.c */
+void bic_disable_irq(unsigned int irq) ; /* Intended to be called from a FLIH to indicate that this interrupt will not fire again */
+void bic_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ; /* Intended to indicate which core will take the next interrupt of this type. Doesn't explocitly enable but other async things may enable */
+void bic_unmask_irq(unsigned int irq) ; /* Explicitly enable this interrupt */
+
+
+
+#define ENABLE_TIMESTAMP_TRACKING
+enum {
+ k_FLIH_Entry ,
+ k_FLIH_Exit ,
+ k_SLIH_Entry ,
+ k_SLIH_Exit ,
+ k_Poll_Entry ,
+ k_Poll_Exit ,
+ k_Enable ,
+ k_CouldEnable ,
+ k_Quantity
+};
+
+static char *timestamp_names[] = {
+ "k_FLIH_Entry" ,
+ "k_FLIH_Exit" ,
+ "k_SLIH_Entry" ,
+ "k_SLIH_Exit" ,
+ "k_Poll_Entry" ,
+ "k_Poll_Exit" ,
+ "k_Enable" ,
+ "k_CouldEnable"
+};
+
+typedef struct {
+ unsigned int hi ;
+ unsigned int lo ;
+} timestamp_t ;
+
+#if defined(ENABLE_TIMESTAMP_TRACKING)
+enum {
+ k_TimestampRingSize = 8
+};
+
+typedef struct {
+ unsigned int current_index ;
+ timestamp_t timestamp[k_TimestampRingSize] ;
+} timestamp_ring_t;
+
+static timestamp_ring_t timestamp_ring[k_Quantity] ;
+#endif
+
+static void record_timestamp(unsigned int x)
+ {
+#if defined(ENABLE_TIMESTAMP_TRACKING)
+ unsigned int tbhi = get_tbu();
+ unsigned int tblo = get_tbl();
+ unsigned int tbhi2 = get_tbu();
+ unsigned int tblo2 = ( tbhi == tbhi2 ) ? tblo : 0 ;
+ timestamp_ring_t *tr = timestamp_ring+x ;
+ unsigned int cx=tr->current_index ;
+ unsigned int cxm=cx&(k_TimestampRingSize-1) ;
+ tr->timestamp[cxm].hi = tbhi2 ;
+ tr->timestamp[cxm].lo = tblo2 ;
+ TRACEN(k_t_detail,"Timestamp %s[%d] = 0x%08x%08x",timestamp_names[x],cx,tbhi2,tblo2) ;
+ tr->current_index=cx+1 ;
+#endif
+ }
+
+static void show_timestamps(void)
+ {
+#if defined(ENABLE_TIMESTAMP_TRACKING)
+ int x ;
+ TRACEN(k_t_detail,"(>)") ;
+ for(x=0;x<k_Quantity;x+=1)
+ {
+ timestamp_ring_t *tr = timestamp_ring+x ;
+ unsigned int cx=tr->current_index ;
+ int q ;
+ for(q=-k_TimestampRingSize;q<0 ; q+=1)
+ {
+ unsigned int cxm=(cx+q)&(k_TimestampRingSize-1) ;
+ TRACEN(k_t_request,"Timestamp %s[%03d] = 0x%08x%08x",timestamp_names[x],q,tr->timestamp[cxm].hi,tr->timestamp[cxm].lo) ;
+ }
+ }
+ TRACEN(k_t_detail,"(<)") ;
+#endif
+ }
+
+static void init_tuning(dma_tcp_t *dma_tcp)
+ {
+#if defined(CONFIG_BLUEGENE_TCP)
+ dma_tcp->bluegene_tcp_is_built = 1 ;
+#else
+ dma_tcp->bluegene_tcp_is_built = 0 ;
+#endif
+ dma_tcp->tuning_num_packets = 0x7fffffff ; /* up from '1', used 16 at one time */
+#if defined(KEEP_TCP_FLAG_STATS)
+ dma_tcp->tcp_received_flag_count[0] = 0 ;
+ dma_tcp->tcp_received_flag_count[1] = 0 ;
+ dma_tcp->tcp_received_flag_count[2] = 0 ;
+ dma_tcp->tcp_received_flag_count[3] = 0 ;
+ dma_tcp->tcp_received_flag_count[4] = 0 ;
+ dma_tcp->tcp_received_flag_count[5] = 0 ;
+ dma_tcp->tcp_received_flag_count[6] = 0 ;
+ dma_tcp->tcp_received_flag_count[7] = 0 ;
+#endif
+#if defined(TORNIC_DEV_POLL)
+#if defined(TORNIC_STEAL_POLL_CORE)
+ /* dma_tcp->tuning_num_empty_passes = 1000000 ; // Try 1 second 'spin' if no data coming */
+ dma_tcp->tuning_num_empty_passes = 5000 ; /* Try 5 millisecond 'spin' if no data coming if we have a whole core for it */
+ dma_tcp->tuning_non_empty_poll_delay = 850 ;
+#else
+ /* Sharing a core, but with 'poll' NAPI */
+ dma_tcp->tuning_num_empty_passes = 1 ; /* Try 10 microsecond 'spin' if no data coming if we are sharing core with app */
+ dma_tcp->tuning_non_empty_poll_delay = 1 ;
+#endif
+#else
+ /* 'interrupts' NAPI */
+ dma_tcp->tuning_num_empty_passes = 1 ; /* Try 10 microsecond 'spin' if no data coming if we are sharing core with app */
+ dma_tcp->tuning_non_empty_poll_delay = 1 ;
+#endif
+ dma_tcp->tuning_poll_after_enabling = 1 ; /* changed from 0 on 20080619 */
+ dma_tcp->tuning_run_handler_on_hwi = 0 ; /* was 1 */
+ dma_tcp->tuning_clearthresh_slih = 1 ; /* = 0 , whether to clear the 'threshold crossed' bit in the slih */
+ dma_tcp->tuning_clearthresh_flih = 0 ; /* = 0 , whether to clear the 'threshold crossed' bit in the flih */
+ dma_tcp->tuning_disable_in_dcr = 1 ; /* = 1, whether to toggle the DCR interrupt enable/disable */
+ dma_tcp->tuning_exploit_reversepropose = 1 ; /* which way to run the propose/accept protocol */
+ dma_tcp->tuning_counters_per_source = 0 ; /* Max reception counters to commit per source node (0 indicates to use 'shareout' algorithm */
+ dma_tcp->tuning_min_icsk_timeout = 200 ; /* Push TCP timeout on torus up to 200 jiffies, we think we have a reliable network ... */
+ dma_tcp->tuning_injection_hashmask = 3 ; /* = 3, whether to mask down the number of injection fifos per direction */
+ dma_tcp->tuning_virtual_channel = k_VC_anyway ; /* Select adaptive routing at boot time */
+ }
+
+dma_tcp_t dma_tcp_state ;
+
+
+/* void __exit */
+/* dma_tcp_module_cleanup (void) */
+/* { */
+// /* nothing to do */
+/* } */
+
+
+
+/* #if defined(CONFIG_BLUEGENE_TCP) */
+#if 1
+static int bgp_dma_tcp_poll(dma_tcp_t *) ;
+static int bgp_dma_tcp_poll(dma_tcp_t *dma_tcp)
+{
+/* Values when I inherited the code, now taken from 'tuning params' */
+/* int num_packets = 1; // received packets one by one */
+/* int num_empty_passes = 512; */
+/* int non_empty_poll_delay = 850; */
+/* Other values I have tried */
+/* int num_packets = 100; */
+/* int num_empty_passes = 0; */
+/* int non_empty_poll_delay = 0; */
+/* int num_packets = 100; // received packets 100 at a time */
+/* int num_empty_passes = 5; */
+/* int non_empty_poll_delay = 10; */
+/* dumpmem(dma_tcp_state.receptionFIFO,128,"Reception memory FIFO") ; */
+
+ int ret ;
+ TRACEN(k_t_irqflow, "(>) tuning_num_packets=%d tuning_num_empty_passes=%d tuning_non_empty_poll_delay=%d",
+ dma_tcp->tuning_num_packets,dma_tcp->tuning_num_empty_passes,dma_tcp->tuning_non_empty_poll_delay );
+ dma_tcp->device_stats = bgtornet_stats() ;
+ ret = DMA_RecFifoPollNormalFifoById( dma_tcp->tuning_num_packets,
+ recFifoId,
+ dma_tcp->tuning_num_empty_passes,
+ dma_tcp->tuning_non_empty_poll_delay,
+ dma_tcp->recFifoGroup,
+ bgp_dma_tcp_empty_fifo_callback);
+ touch_softlockup_watchdog() ; /* If we get a continuous stream of packets, we do not really want the softlockup watchdog to bark */
+ TRACEN(k_t_irqflow, "(<) ret=%d",ret );
+/* ASSERT( ret >= 0 ); */
+ return ret;
+}
+
+
+static void recfifo_disable(void)
+ {
+ TRACEN(k_t_detail,"(><)") ;
+ mtdcrx(0xd71,0) ;
+ }
+
+static void recfifo_enable(void)
+ {
+ TRACEN(k_t_detail,"(><)") ;
+ record_timestamp(k_Enable) ;
+ mtdcrx(0xd71,0x80000000) ;
+ }
+
+static void reccounter_disable(void)
+ {
+ TRACEN(k_t_detail,"(><)") ;
+ mtdcrx(0xd7a,0) ;
+ }
+
+static void reccounter_enable(void)
+ {
+ TRACEN(k_t_detail,"(><)") ;
+ record_timestamp(k_Enable) ;
+ mtdcrx(0xd7a,0xffffffff) ;
+ }
+
+static void dma_tcp_slih_handler(unsigned long dummy)
+ {
+ int ret;
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ record_timestamp(k_SLIH_Entry) ;
+
+ TRACEN(k_t_irqflow,"(>)" );
+ enable_kernel_fp() ;
+ ret = bgp_dma_tcp_poll(dma_tcp);
+ /* Clear the 'threshold crossed' flag so we don't automatically reinterrupt */
+ DMA_RecFifoSetClearThresholdCrossed( dma_tcp_state.recFifoGroup,
+ 0x80000000,
+ 0 );
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+ mod_timer(&dma_tcp->torus_missed_interrupt_timer, jiffies+200) ; /* Cause timer interrupt after 2000ms if things don't stay alive ... temp while diagnosing problem ... */
+#endif
+ record_timestamp(k_SLIH_Exit) ;
+#if !defined(TORNIC_DEV_POLL)
+ recfifo_enable() ;
+ reccounter_enable() ;
+#endif
+ TRACEN(k_t_irqflow,"(<)" );
+ }
+
+static void trip_missed_interrupt(dma_tcp_t *dma_tcp)
+{
+ unsigned int fifo_dcr = mfdcrx(0xd71) ;
+ unsigned int counter_dcr = mfdcrx(0xd7a) ;
+ struct bic_regs * bic_regs = bic.regs ;
+ unsigned int target_2_3 = bic_regs->group[2].target[3] ;
+ unsigned int target_3_0 = bic_regs->group[3].target[0] ;
+ unsigned int notEmpty = DMA_RecFifoGetNotEmpty(dma_tcp->recFifoGroup,0) ;
+ unsigned int thresholdCrossed = DMA_RecFifoGetThresholdCrossed(dma_tcp->recFifoGroup,0) ;
+ if( fifo_dcr != 0x80000000 || counter_dcr != 0xffffffff || target_2_3 != 0x00006000 || target_3_0 != 0x00006000 || notEmpty != 0 )
+ {
+ TRACEN(k_t_general,"maybe missed interrupt fifo_dcr=0x%08x counter_dcr=0x%08x target_2_3=0x%08x target_3_0=0x%08x notEmpty=0x%08x thresholdCrossed=0x%08x",
+ fifo_dcr,counter_dcr,target_2_3,target_3_0,notEmpty,thresholdCrossed) ;
+ dma_tcp_slih_handler(0) ;
+ }
+}
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+static void dma_tcp_missed_interrupt(unsigned long dummy)
+{
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ TRACEN(k_t_irqflow,"(>)") ;
+ trip_missed_interrupt(dma_tcp) ;
+ mod_timer(&dma_tcp->torus_missed_interrupt_timer, jiffies+10) ; /* Cause timer interrupt after 100ms if things don't stay alive ... temp while diagnosing problem ... */
+ TRACEN(k_t_irqflow,"(<)") ;
+}
+#endif
+static volatile int dma_ticket_req ;
+static volatile int dma_ticket_ack ;
+
+void dma_tcp_poll_handler(void)
+ {
+ int cur_ticket_req = dma_ticket_req ;
+ record_timestamp(k_Poll_Entry) ;
+
+ dma_ticket_ack = cur_ticket_req ;
+ TRACEN(k_t_irqflow,"dma_tcp_poll_handler: cur_ticket_req=%d (>)",cur_ticket_req );
+ dma_tcp_slih_handler(0) ;
+ TRACEN(k_t_irqflow,"dma_tcp_poll_handler: cur_ticket_req=%d (<)",cur_ticket_req );
+ record_timestamp(k_Poll_Exit) ;
+ }
+
+void dma_tcp_rx_enable(void)
+ {
+ unsigned long flags ;
+ TRACEN(k_t_irqflow,"(>)" );
+ record_timestamp(k_CouldEnable) ;
+ recfifo_enable() ;
+ reccounter_enable() ;
+ bic_set_cpu_for_irq(k_fifo_irq,k_TorusAffinityCPU) ;
+ bic_set_cpu_for_irq(k_rec_counter_irq,k_TorusAffinityCPU) ;
+ /* Both interrupts unmasked before we take one to avoid the chance of an interrupt after the first */
+ /* which (?) could go round the loop and 'do the wrong thing' with respect to napi and enabling the second */
+ /* while trying to run the napi poll */
+ local_irq_save(flags) ;
+ bic_unmask_irq(k_fifo_irq) ;
+ bic_unmask_irq(k_rec_counter_irq) ;
+ local_irq_restore(flags) ;
+ /* If we get here and there's an 'interrupt cause' in the DCRs, we have missed an interrupt. Trace it and fire the SLIH. */
+ trip_missed_interrupt(&dma_tcp_state ) ;
+ TRACEN(k_t_irqflow,"(<)" );
+
+ }
+
+static DECLARE_TASKLET(dma_tcp_slih, dma_tcp_slih_handler,0) ;
+
+/* This gets driven in the FLIH when a DMA interrupt occurs */
+static void receiveFLIH(u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+{
+ TRACEN(k_t_irqflow,"(>) FLIH" );
+ record_timestamp(k_FLIH_Entry) ;
+ bic_disable_irq(k_fifo_irq) ;
+ bic_disable_irq(k_rec_counter_irq) ;
+ bgtornet_rx_schedule() ;
+ record_timestamp(k_FLIH_Exit) ;
+ TRACEN(k_t_irqflow,"(<) FLIH" );
+}
+
+static void receiveCommHandler(u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+{
+ TRACEN(k_t_irqflow,"(>)" );
+ recfifo_disable() ;
+ receiveFLIH(arg1,arg2,arg3,arg4) ;
+ TRACEN(k_t_irqflow,"(<)" );
+}
+
+/* This gets driven in the FLIH when a DMA interrupt occurs */
+static void receiveCounterZeroHandler(u32 arg1, u32 arg2, u32 arg3, u32 arg4)
+{
+ TRACEN(k_t_irqflow,"(>)" );
+ reccounter_disable() ;
+ receiveFLIH(arg1,arg2,arg3,arg4) ;
+ TRACEN(k_t_irqflow,"(<)" );
+}
+
+
+static int unknownActor(DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ )
+ {
+ unsigned int SW_Arg __attribute__ ((unused)) =packet_ptr->SW_Arg ;
+ unsigned int Func_Id __attribute__ ((unused)) =packet_ptr->Func_Id ;
+ unsigned int x __attribute__ ((unused)) =SW_Arg >> 16 ;
+ unsigned int y __attribute__ ((unused)) =( SW_Arg >> 8) & 0xff ;
+ unsigned int z __attribute__ ((unused)) =SW_Arg & 0xff ;
+ TRACEN(k_t_error,"(!!!) %08x %02x (%02x,%02x,%02x) payload_ptr=%p payload_bytes=%d", SW_Arg,Func_Id,x,y,z,payload_ptr, payload_bytes );
+ return 0 ;
+ }
+
+/* static char reception_fifo_buffer[k_desired_reception_memory_fifo_size] __attribute__ ((__aligned__(32))) ; */
+/* We need a reception FIFO; we are prepared to compromise on its size */
+static void __init
+dma_tcp_setup_reception_fifo(dma_tcp_t *dma_tcp)
+ {
+ unsigned int allocation_size=k_desired_reception_memory_fifo_size ;
+ void * allocation_address=local_permanent_alloc(k_desired_reception_memory_fifo_size) ;
+ dma_tcp->receptionfifo = allocation_address ;
+ dma_tcp->receptionfifoSize = allocation_size ;
+ /* Must get a memory FIFO area, and it must be L1-aligned */
+ BUG_ON(allocation_address == NULL) ;
+ BUG_ON(0 != (0x1f & (int)allocation_address)) ;
+ if( allocation_address != NULL )
+ {
+ memset(allocation_address, 0xcc, allocation_size) ;
+ }
+ TRACEN(k_t_init,"reception_fifo address=%p length=%d=0x%08x",allocation_address,allocation_size,allocation_size) ;
+ }
+
+#endif
+
+
+void __init
+bgp_fpu_register_memcpy_sysctl(void) ;
+
+enum
+{
+ k_enable_dma_memcpy = 1
+} ;
+
+static void __init
+dma_tcp_init(dma_tcp_t *dma_tcp, BGP_Personality_t *pers)
+ {
+ int compute_node_count = pers->Network_Config.Xnodes*pers->Network_Config.Ynodes*pers->Network_Config.Znodes ;
+ int i_am_compute_node= ( pers->Network_Config.Rank != pers->Network_Config.IOnodeRank ) ;
+ TRACEN(k_t_init,"(>) PAGE_SHIFT=%d PAGE_SIZE=%lu", PAGE_SHIFT, PAGE_SIZE );
+ bgp_fpu_register_memcpy_sysctl() ;
+ init_tuning(dma_tcp) ;
+ dma_tcp->location.coordinate[0] = pers->Network_Config.Xcoord;
+ dma_tcp->location.coordinate[1] = pers->Network_Config.Ycoord;
+ dma_tcp->location.coordinate[2] = pers->Network_Config.Zcoord;
+ dma_tcp->extent.coordinate[0] = pers->Network_Config.Xnodes;
+ dma_tcp->extent.coordinate[1] = pers->Network_Config.Ynodes;
+ dma_tcp->extent.coordinate[2] = pers->Network_Config.Znodes;
+ dma_tcp->node_count = compute_node_count ;
+ dma_tcp->node_slot_mask = (compute_node_count )-1 ;
+
+ dma_tcp->SW_Arg = (pers->Network_Config.Xcoord << 16)
+ | (pers->Network_Config.Ycoord << 8)
+ | (pers->Network_Config.Zcoord) ;
+ dma_tcp->src_key = dma_tcp->location.coordinate[0]*dma_tcp->extent.coordinate[1]*dma_tcp->extent.coordinate[2]
+ +dma_tcp->location.coordinate[1]*dma_tcp->extent.coordinate[2]
+ +dma_tcp->location.coordinate[2] ;
+
+ dma_tcp->xbits = fls(pers->Network_Config.Xnodes)-1 ;
+ dma_tcp->ybits = fls(pers->Network_Config.Ynodes)-1 ;
+ dma_tcp->zbits = fls(pers->Network_Config.Znodes)-1 ;
+ /* YKT BGP seems wired so that no partition less than 8x8x8 is a torus in any dimension */
+ dma_tcp->is_torus_x = (pers->Network_Config.Xnodes >= 8 && pers->Network_Config.Ynodes >= 8 && pers->Network_Config.Znodes >= 8) ;
+ dma_tcp->is_torus_y = dma_tcp->is_torus_x ;
+ dma_tcp->is_torus_z = dma_tcp->is_torus_x ;
+ dma_tcp->block_id = pers->Network_Config.BlockID & 0x00ffffff ;
+ dma_tcp->i_am_compute_node = i_am_compute_node ;
+ TRACEN(k_t_init,"SW_Arg=0x%08x rank=%d=0x%08x src_key=0x%08x xbits=%d ybits=%d zbits=%d ",
+ dma_tcp->SW_Arg, pers->Network_Config.Rank, pers->Network_Config.Rank, dma_tcp->src_key,
+ dma_tcp->xbits,dma_tcp->ybits,dma_tcp->zbits );
+
+ if( 0 == dma_tcp->mtu)
+ {
+ bgp_dma_tcp_set_mtu(dma_tcp, 64996) ;
+ }
+
+#if defined(TORUS_RECEIVE_WITH_SLIH)
+#else
+ skb_queue_head_init(&dma_tcp->skb_pool) ;
+ skb_queue_head_init(&dma_tcp->skb_list_free) ;
+#endif
+ {
+ int core ;
+ for( core=0; core<k_injecting_cores; core += 1)
+ {
+ int desired_fifo ;
+ for(desired_fifo=0;desired_fifo<k_injecting_directions;desired_fifo+=1)
+ spin_lock_init(&dma_tcp->dirInjectionLock[core*k_injecting_directions+desired_fifo]) ;
+ }
+ }
+
+#if defined(TORUS_RECEIVE_WITH_SLIH)
+#else
+ tasklet_schedule(&pool_filler_slih) ;
+#endif
+
+#if defined(CONFIG_BLUEGENE_TCP)
+ /* Only compute nodes are torus-capable ... */
+ if( pers->Network_Config.Rank != pers->Network_Config.IOnodeRank )
+ {
+
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+ setup_timer(&dma_tcp->torus_missed_interrupt_timer,dma_tcp_missed_interrupt,0) ;
+#endif
+ {
+ int subX ;
+ for(subX=0;subX<DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP;subX +=1)
+ {
+ dma_tcp->injCntrSubgrps[ subX ] = subX ;
+ dma_tcp->recCntrSubgrps[ subX ] = subX ;
+ }
+ }
+
+ /* register a receive function for 'unrecognised' memfifo packets */
+ DMA_RecFifoRegisterRecvFunction(unknownActor, dma_tcp, 1, 0);
+
+ dma_tcp_setup_reception_fifo(dma_tcp) ;
+ dma_tcp->recMap.threshold[0] = dma_tcp->receptionfifoSize/16; /* generate interrupts when anything is in the fifo */
+ {
+ int ret __attribute__ ((unused)) = DMA_RecFifoSetMap( &dma_tcp->recMap ); /* fifo 0 will receive packets from everywhere */
+
+ TRACEN(k_t_init,"(=)DMA_RecFifoSetMap rc=%d", ret );
+ }
+ /* Register functions for 'frames' style access */
+ dma_tcp_frames_init(dma_tcp) ;
+
+
+
+ /* set up rec fifo group */
+ dma_tcp->recFifoGroup = DMA_RecFifoGetFifoGroup( k_ReceptionFifoGroup, 0, receiveCommHandler, NULL, NULL, NULL, NULL );
+
+
+ TRACEN(k_t_init,"(=)DMA_RecFifoGetFifoGroup dma_tcp->recFifoGroup=%p", dma_tcp->recFifoGroup );
+
+ /* initalize rec fifo */
+ {
+ int ret __attribute__ ((unused)) = DMA_RecFifoInitById ( dma_tcp->recFifoGroup,
+ recFifoId,
+ dma_tcp->receptionfifo, /* fifo start */
+ dma_tcp->receptionfifo, /* fifo head */
+ dma_tcp->receptionfifo+dma_tcp->receptionfifoSize /* fifo end */
+ );
+ TRACEN(k_t_init,"(=)DMA_RecFifoInitById rc=%d", ret );
+ }
+ TRACEN(k_t_general, "(=)(I) testdma: CounterGroupAllocate");
+
+ {
+ /* Initialize injection counter group */
+ int ret __attribute__ ((unused)) = DMA_CounterGroupAllocate( DMA_Type_Injection,
+ k_InjectionCounterGroup, /* group number */
+ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP,
+ dma_tcp->injCntrSubgrps,
+ 0, /* target core for interrupts */
+ NULL,
+ NULL,
+ NULL,
+ & dma_tcp->injCounterGroup );
+
+ TRACEN(k_t_init,"(=)DMA_CounterGroupAllocate rc=%d", ret );
+ }
+ memset(dma_tcp->inj_skbs,0,DMA_NUM_COUNTERS_PER_GROUP*sizeof(struct sk_buff *)) ;
+
+ /* enable the counter */
+ {
+ int ret;
+ DMA_CounterSetEnableById( & dma_tcp->injCounterGroup,0) ;
+ ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup,0,0xffffffff) ;
+ TRACEN(k_t_general, "(=)(I) testdma: DMA_CounterSetValueWideOpenById ret=%d",ret) ;
+
+ }
+
+#if defined(CONFIG_WRAP_COPY_TOFROM_USER) && defined(CONFIG_BLUEGENE_DMA_MEMCPY)
+ /* TODO: Investigate why 'dma_memcpy' needed to be initialised before 'dma_tcp counters' */
+ if( k_enable_dma_memcpy) bgp_dma_memcpyInit(dma_tcp) ;
+#endif
+ {
+ /* Initialize reception counter group */
+ int ret __attribute__ ((unused)) = DMA_CounterGroupAllocate( DMA_Type_Reception,
+ k_ReceptionCounterGroup, /* group number */
+ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP,
+ dma_tcp->recCntrSubgrps,
+ 2, /* target core for interrupts */
+ receiveCounterZeroHandler,
+ NULL,
+ NULL,
+ & dma_tcp->recCounterGroup );
+ TRACEN(k_t_init,"(=)DMA_CounterGroupAllocate rc=%d", ret );
+ }
+ memset(dma_tcp->recCntrInUse,0,DMA_NUM_COUNTERS_PER_GROUP) ;
+ memset(dma_tcp->rcv_skbs,0,DMA_NUM_COUNTERS_PER_GROUP*sizeof(struct sk_buff *)) ;
+ dma_tcp->qtyFreeRecCounters = 64 ;
+ dma_tcp->scanRecCounter = 0 ;
+ dma_tcp->framesDisposed = 0 ;
+ atomic_set(&dma_tcp->framesProposed, 0 ) ;
+ }
+ dma_tcp_devfs_procfs_init(dma_tcp) ;
+#endif
+ TRACEN(k_t_init,"(<)" );
+
+
+
+ }
+
+void bgp_torus_set_mtu(unsigned int mtu)
+ {
+ bgp_dma_tcp_set_mtu(&dma_tcp_state, mtu) ;
+ }
+
+int __init
+dma_tcp_module_init(void)
+{
+ int ret = 0;
+
+ BGP_Personality_t pers;
+
+ bluegene_getPersonality(&pers, sizeof(pers));
+
+ dma_tcp_init(&dma_tcp_state, &pers) ;
+
+ TRACEN(k_t_init, "(I)initDMA finished ret:%d",ret);
+ return ret;
+}
+
+static void fix_retransmit_timeout(struct sk_buff *skb)
+{
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ struct sock *sk = skb->sk ;
+ unsigned int family=sk->sk_family ;
+ struct inet_sock *inet = inet_sk(sk) ;
+ struct inet_connection_sock *icsk = inet_csk(sk) ;
+ int is_icsk = inet->is_icsk ;
+ TRACEN(k_t_detail,"skb=%p sk=%p sk_family=0x%04x is_icsk=%d",skb,sk,family,is_icsk) ;
+ if( AF_INET == family && is_icsk )
+ {
+ TRACEN(k_t_detail,"icsk_timeout-jiffies=%lu icsk_rto=%u",icsk->icsk_timeout-jiffies,icsk->icsk_rto) ;
+ if( icsk->icsk_rto < dma_tcp->tuning_min_icsk_timeout )
+ {
+ icsk->icsk_rto=dma_tcp->tuning_min_icsk_timeout ;
+ }
+ }
+
+}
+int bgp_dma_tcp_send_and_free( struct sk_buff *skb )
+{
+ int rc ;
+ if( k_find_source_of_rst_flags && dma_tcp_state.tuning_diagnose_rst )
+ {
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ unsigned int h_proto = eth->h_proto ;
+ if( ETH_P_IP == h_proto )
+ {
+ struct iphdr *iph = (struct iphdr *)(eth+1) ;
+ if(IPPROTO_TCP == iph->protocol )
+ {
+ struct tcphdr *tcph = (struct tcphdr *)(iph+1) ;
+ if( tcph->rst)
+ {
+ TRACEN(k_t_request,"RST on frame to [%02x:%02x:%02x]",
+ eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]) ;
+ show_stack(0,0) ; /* Stack back-chain may help explain why it was sent */
+
+ }
+ }
+
+ }
+
+ }
+ fix_retransmit_timeout(skb) ;
+ rc = bgp_dma_tcp_send_and_free_frames(skb) ;
+ return rc ;
+}
+
+/* Test if we think a socket is affected by torus congestion. Do this by looking to see if anything is in any software transmit FIFO */
+unsigned int bgp_torus_congestion(struct sock *sk)
+ {
+ unsigned int core ;
+ unsigned int direction ;
+ struct inet_connection_sock *icskp = inet_csk(sk) ;
+ struct inet_sock *inet = inet_sk(sk);
+ unsigned int daddr=inet->daddr ;
+ dma_tcp_t *dma_tcp=&dma_tcp_state ;
+ struct sk_buff *skb = skb_peek(&sk->sk_write_queue) ;
+
+ if( dma_tcp->i_am_compute_node
+ )
+ {
+ if( NULL == skb )
+ {
+ TRACEN(k_t_congestion,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d q-empty-retransmit",
+ sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+ daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+ icskp->icsk_retransmits, icskp->icsk_rto
+ ) ;
+ return 0 ;
+ }
+ if( 0 == skb->len)
+ {
+ TRACEN(k_t_general,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d ack-transmit",
+ sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+ daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+ icskp->icsk_retransmits, icskp->icsk_rto
+ ) ;
+ return 0 ;
+ }
+#if defined(USE_SKB_TO_SKB)
+ {
+ unsigned int framesProposed=atomic_read(&dma_tcp->framesProposed) ;
+ unsigned int framesDisposed=dma_tcp->framesDisposed ;
+ if( framesProposed != framesDisposed)
+ {
+ TRACEN(k_t_general,
+ "sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u propose=0x%08x disp=0x%08x\n",
+ sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+ daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+ framesProposed,framesDisposed
+ ) ;
+ return 1 ;
+
+ }
+ }
+#endif
+ for( core=0; core<k_injecting_cores; core += 1)
+ {
+ for( direction=0;direction<k_injecting_directions; direction+=1)
+ {
+ unsigned int fifo_current_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction]) ;
+ unsigned int fifo_current_tail =
+ (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction]) ;
+ if( fifo_current_head != fifo_current_tail)
+ {
+ TRACEN(k_t_general,
+ "sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u core=%d direction=%d fifo_current_head=0x%08x fifo_current_tail=0x%08x\n",
+ sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+ daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+ core,direction,
+ fifo_current_head,fifo_current_tail
+ ) ;
+ return 1 ;
+ }
+ }
+ }
+ }
+
+ TRACEN(k_t_congestion,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d retransmit",
+ sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+ daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+ icskp->icsk_retransmits, icskp->icsk_rto
+ ) ;
+/* if( icskp->icsk_rto < 300) */
+/* { */
+/* icskp->icsk_rto = icskp->icsk_rto << 1 ; */
+/* return 1 ; */
+/* } */
+ return 0 ;
+ }
+
+void analyse_retransmit(struct sock *sk, struct sk_buff *skb)
+ {
+ if( skb && skb->len>0 ) /* Need a SKB,and if len=0 then it's an ACK with no data */
+ {
+ struct inet_sock *inet = inet_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ unsigned int daddr=inet->daddr ;
+ unsigned int daddr_b0 = daddr >> 24 ;
+ if( daddr_b0 == 11 || daddr_b0 == 12 ) /* BGP fabric is 11.*.*.* and 12.*.*.* , only interested in those */
+ {
+ TRACEN(k_t_congestion,"(I) sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d resending (BGP)",
+ sk, skb, skb->data, skb->len, TCP_SKB_CB(skb)->flags,
+ daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,icsk->icsk_retransmits, icsk->icsk_rto) ;
+ }
+ }
+
+ }
+
+
+/* Seem to have picked up a half-implemented feature. Dummy it. */
+DMA_CounterAppSegment_t *DMA_CounterAppSegmentArray;
+int DMA_CounterInitAppSegments(void) { return 0 ; }
+
+void show_personality(void) ;
+void show_sprs(void) ;
+/* Issue a diagnostic op at the DMA layer */
+void torus_diag(int op)
+ {
+ BGP_Personality_t pers;
+ TRACES("(>)op=%d",op) ;
+
+ bluegene_getPersonality(&pers, sizeof(pers));
+ switch(op)
+ {
+ case 0:
+ show_bic_regs() ;
+ break ;
+ case 1:
+#if defined(CONFIG_BLUEGENE_TCP)
+ if( pers.Network_Config.Rank != pers.Network_Config.IOnodeRank )
+ {
+ tasklet_schedule(&dma_tcp_slih);
+ }
+#endif
+ break ;
+ case 2:
+ if( pers.Network_Config.Rank != pers.Network_Config.IOnodeRank )
+ {
+ dumpdmadcrs(k_t_request) ;
+ }
+ break ;
+ case 3:
+#if defined(CONFIG_BLUEGENE_TCP)
+ if( pers.Network_Config.Rank != pers.Network_Config.IOnodeRank )
+ {
+ dumpRecFifoGroup(dma_tcp_state.recFifoGroup) ;
+ show_timestamps() ;
+ bgp_dma_tcp_display_pending_slots(&dma_tcp_state,dma_tcp_state.node_count) ;
+ }
+#endif
+ break ;
+ case 4:
+/* show_state() ; // kernel threads and their stacks */
+ break ;
+ case 5:
+/* show_tlbs() ; // This core's current TLBs */
+/* show_sprs() ; // Core special-purpose regs relevant to debugging */
+/* show_personality() ; // Items from the 'personality' from microcode */
+ break ;
+ case 6:
+/* #if defined(USE_SKB_TO_SKB) */
+/* bgp_dma_diag_reissue_rec_counters(&dma_tcp_state) ; */
+/* #endif */
+ break ;
+ case 7:
+ #if defined(USE_SKB_TO_SKB)
+ dma_tcp_show_reception(&dma_tcp_state) ;
+ #endif
+ break ;
+ default:
+ ;
+ }
+ TRACES("(<)") ;
+ }
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp.h b/drivers/net/bgp_torus/bgp_dma_tcp.h
new file mode 100644
index 00000000000000..3b6e60ea58a7e5
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp.h
@@ -0,0 +1,1623 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ *
+ *
+ ********************************************************************/
+#ifndef __BGP_DMA_TCP_H__
+#define __BGP_DMA_TCP_H__
+#include <linux/bootmem.h>
+#include <asm/div64.h>
+#include <linux/timer.h>
+#include <linux/bootmem.h>
+#include <linux/sysctl.h>
+#include <asm/atomic.h>
+
+#include "../bgp_network/bgp_net_traceflags.h"
+
+extern int bgp_dma_tcp_tracemask ;
+
+/* Can drop bits out of COMPILED_TRACEMASK if we want to selectively compile out trace */
+/* #define COMPILED_TRACEMASK (0xffffffff-k_t_irqflow-k_t_irqflow_rcv-k_t_detail-k_t_fifocontents-k_t_toruspkt) */
+#define COMPILED_TRACEMASK (0xffffffff)
+/* #define COMPILED_TRACEMASK (k_t_error) */
+
+/* #define TORNIC_DIAGNOSE_TLB */
+#include <linux/KernelFxLog.h>
+/* 'XTRACEN' would be a dummied-out trace statement */
+#define XTRACEN(i,x...)
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE)
+#define TRACING(i) (bgp_dma_tcp_tracemask & (COMPILED_TRACEMASK & (i)))
+#define TRACE(x...) KernelFxLog(bgp_dma_tcp_tracemask & k_t_general,x)
+#define TRACE1(x...) KernelFxLog(bgp_dma_tcp_tracemask & k_t_lowvol,x)
+#define TRACE2(x...) KernelFxLog(bgp_dma_tcp_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgp_dma_tcp_tracemask & (COMPILED_TRACEMASK & (i)),x)
+#define TRACED(x...) KernelFxLog(1,x)
+#define TRACES(x...) KernelFxLog(1,x)
+#else
+#define TRACING(x) 0
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#define TRACES(x...)
+#endif
+
+#if defined(CONFIG_BLUEGENE_TCP)
+#define ENABLE_FRAMES
+#endif
+
+#define AUDIT_FRAME_HEADER
+
+#define KEEP_TCP_FLAG_STATS
+
+#define BARRIER_WITH_IOCTL
+/* #define EXERCISE_WITH_IOCTL */
+
+void bgp_dma_diag_report_transmission_queue(int __user * report) ;
+
+#if defined(BARRIER_WITH_IOCTL)
+void dma_tcp_transfer_activate_sync(int sendBytes) ;
+int dma_tcp_transfer_wait_sync(int demandCount) ;
+void dma_tcp_transfer_clearcount(void) ;
+#endif
+
+#if defined(EXERCISE_WITH_IOCTL)
+void dma_tcp_transfer_activate(int sendBytes) ;
+void dma_tcp_transfer_activate_to_one(int sendBytes, unsigned int tg) ;
+void dma_tcp_transfer_activate_minicube(int sendBytes) ;
+int dma_tcp_transfer_wait(int demandCount) ;
+#endif
+
+/* Whether we want a 'watchdog' on torus arrivals */
+#define HAS_MISSED_INTERRUPT_TIMER
+
+/* Adaptive routing controls. */
+/* USE_ADAPTIVE_ROUTING builds a runtime capable of it; lower the value in /sys/module/bgp_torus/parameters/bgp_dma_adaptive_frame_limit to get frames send that way */
+/* INITIAL_ADAPTIVE_ROUTING sets things that way at boot (and may set params up so that attempted deterministic routing isn't actually deterministic) */
+#if defined(CONFIG_BGP_TORUS_ADAPTIVE_ROUTING)
+#define USE_ADAPTIVE_ROUTING
+#define RESEQUENCE_ARRIVALS
+#define INITIAL_ADAPTIVE_ROUTING
+#endif
+
+/* Support for skbuff-to-skbuff DMA */
+#define USE_SKB_TO_SKB
+
+/* What to use the 'dest-key' in the linkhdr for. Timestamping looks good ... */
+/* #define ENABLE_LATENCY_TRACKING */
+/* #define TRACK_SEQUENCE */
+/* #define ENABLE_PROGRESS_TRACKING */
+
+#define TORUS_RECEIVE_WITH_SLIH
+
+/* #define TORUS_WITH_SIGNATURES */
+
+/* Diagnosic options */
+enum {
+ k_allow_interrupts_while_injecting = 0 , /* Select this for profiling injection */
+ k_async_free = 1 , /* Set this to allow timer-based freeing of skbuffs where the DMA has completed */
+ k_dumpmem_diagnostic = 0 ,
+ k_scattergather_diagnostic = 0 ,
+ k_verify_target = 0 , /* Whether to firewall-check that the target is reachable */
+ k_detail_stats = 0 , /* Whether to collect detailed statistics */
+ k_counter_flow_control = 1 , /* Whether to flow-control by limiting the number of reception counters allocates to a single source */
+ k_force_eager_flow = 0 , /* Whether to start up with everything running 'eager' protocol (no 'rendezvous') */
+ k_abbreviate_headlen = 1 , /* Whether to abbreviate the DMA transfer of 'head' in respect of the FIFO transfer */
+ /* TODO: after testing that it works (on busy machines) , we should always take the 'deferral' path */
+ k_allow_defer_skb_for_counter = 1, /* Whether to allow deferral allocating a 'full-size' skb until a reception counter is available */
+ k_verify_ctlen = 1 , /* Whether to check that the length in the IP header matches the skbuff structure */
+ k_configurable_virtual_channel = 1 /* Whether to allow runtime configuration of the virtual channel to use */
+};
+
+
+
+enum {
+ numInjCounters = 1 ,
+ recFifoId = 0 ,
+ k_InjectionFifoGroupFrames = 0 ,
+ k_InjectionFifoGroupMemcpy = 1 ,
+ k_ReceptionFifoGroup = 0 ,
+ k_InjectionCounterGroup = 0 ,
+ k_ReceptionCounterGroup = 0 ,
+ k_ReceptionCounterGroupMemcpy = 1
+
+};
+
+/* We handle fragmented skbuffs if they are presented. The receive side doesn't need to know; */
+/* the send side injects additional 'direct put' descriptors as needed. */
+/* The bytes on the wire might be slightly different split between cells, but on the receive side this */
+/* is all handled by hardware. */
+enum {
+ k_support_scattergather = 1 /* Whether we support a 'scattergather' skbuff */
+};
+
+/* At one time, we ran per-core injection, to try to minimise the locking requirement. This is now changed to */
+/* per-destination injection, to try to minimise the out-of-order delivering. */
+enum {
+ k_injecting_cores = 4 ,
+ k_skb_controlling_directions = 7 , /* 'directions' where we want to free skbuffs when sent */
+#if defined(USE_SKB_TO_SKB)
+ k_injecting_directions = 8 , /* 6 real directions, a 'taxi' for single packet messages, and a 'propose/accept stream' */
+#else
+ k_injecting_directions = 7 , /* 6 real directions, a 'taxi' for single packet messages */
+#endif
+};
+
+/* Following section for 'packets' style */
+enum {
+ k_torus_skb_alignment = 16 ,
+ k_torus_link_payload_size = 240
+};
+
+enum {
+ k_idma_descriptor_size = 32 ,
+ k_injection_packet_size = 240
+} ;
+
+enum {
+/* k_concurrent_receives = 32 */ /* Number of frames-in-flight we can handle from a source (in respect of adaptive routing) */
+ k_concurrent_receives = 128 /* Number of frames-in-flight we can handle from a source (in respect of adaptive routing) */
+};
+
+static inline void * local_permanent_alloc(unsigned int size)
+ {
+ void *result = kmalloc(size, GFP_KERNEL) ;
+ TRACEN(k_t_general,"size=0x%08x result=%p",size,result) ;
+ return result ;
+ }
+
+/* Using these when we are statically allocating buffers, or using alloc_bootmem_low */
+enum {
+ k_idma_descriptor_count = 16384, /* Design choice */
+ k_injection_packet_count = 16384 /* Matches IDMA descriptor count, to keep tagging simple */
+ /* k_injection_packet_count = (1<<22)/k_injection_packet_size // 4 megabytes of 'runway' */
+};
+
+enum {
+ k_memcpy_idma_descriptor_count = 64, /* Design choice */
+};
+
+typedef struct {
+ char buffer[k_idma_descriptor_size*k_memcpy_idma_descriptor_count] ;
+} memcpy_packet_injection_memoryfifo_t __attribute__((aligned(16)));
+
+typedef struct {
+ char buffer[k_idma_descriptor_size*k_idma_descriptor_count] ;
+} packet_injection_memoryfifo_t __attribute__((aligned(16)));
+
+typedef struct {
+ int tailx[k_injection_packet_count] ;
+} packet_injection_tag_t ;
+
+typedef struct {
+ struct sk_buff * skb_array[k_injection_packet_count] ;
+} packet_skb_array_t ;
+
+static inline packet_injection_memoryfifo_t * allocate_packet_injection_memoryfifo(unsigned int core, unsigned int direction)
+ {
+ packet_injection_memoryfifo_t * rc = local_permanent_alloc(sizeof(packet_injection_memoryfifo_t)) ;
+ BUG_ON(rc == NULL) ;
+ XTRACEN(k_t_init,"allocate_packet_injection_memoryfifo core=%d direction=%d rc=%p",
+ core, direction, rc ) ;
+ BUG_ON( ( ((unsigned int) rc) & 0x1f) != 0 ) ; /* Need 32-byte alignment */
+ return rc ;
+ }
+
+static inline memcpy_packet_injection_memoryfifo_t * allocate_memcpy_packet_injection_memoryfifo(unsigned int core)
+ {
+ memcpy_packet_injection_memoryfifo_t * rc = local_permanent_alloc(sizeof(memcpy_packet_injection_memoryfifo_t)) ;
+ BUG_ON(rc == NULL) ;
+ TRACEN(k_t_general,"allocate_memcpy_packet_injection_memoryfifo core=%d rc=%p",
+ core, rc ) ;
+ BUG_ON( ( ((unsigned int) rc) & 0x1f) != 0 ) ; /* Need 32-byte alignment */
+ return rc ;
+ }
+
+static inline packet_injection_tag_t * allocate_packet_injection_tag(unsigned int core, unsigned int direction)
+ {
+ packet_injection_tag_t * rc = kmalloc(sizeof(packet_injection_tag_t),GFP_KERNEL) ;
+ BUG_ON(rc == NULL) ;
+ XTRACEN(k_t_init,"allocate_packet_injection_tag core=%d direction=%d rc=%p",
+ core, direction, rc ) ;
+ BUG_ON( ( ((unsigned int) rc) & 0x1f) != 0 ) ; /* Need 32-byte alignment */
+ return rc ;
+ }
+
+static inline packet_skb_array_t * allocate_packet_skb_array(unsigned int core, unsigned int direction)
+ {
+ packet_skb_array_t * rc = kmalloc(sizeof(packet_skb_array_t),GFP_KERNEL) ;
+ BUG_ON(rc == NULL) ;
+ XTRACEN(k_t_init,"allocate_skb_array core=%d direction=%d rc=%p",
+ core, direction, rc ) ;
+ memset(rc,0,sizeof(packet_skb_array_t)) ;
+ return rc ;
+ }
+
+enum {
+ k_idma_frame_count = 16384 /* Design choice */
+};
+
+typedef struct {
+#if defined(ENABLE_PACKETS) || defined(ENABLE_FRAMES)
+ packet_injection_memoryfifo_t * idma_fifo ;
+ packet_injection_tag_t * idma_tag ;
+ packet_skb_array_t * idma_skb_array ;
+ unsigned int fifo_head_index ;
+ unsigned int fifo_tail_index ;
+ unsigned int buffer_head_index ;
+ unsigned int buffer_tail_index ;
+ unsigned int fifo_initial_head ;
+ unsigned int packets_injected_count ;
+ unsigned int injection_vacant ;
+ unsigned int injection_high_watermark ;
+#endif
+#if defined(ENABLE_FRAMES)
+ struct sk_buff_head frame_queue ;
+#endif
+} idma_direction_t ;
+
+static inline void allocate_idma_direction(idma_direction_t * idma_direction,unsigned int core, unsigned int direction)
+ {
+#if defined(ENABLE_PACKETS) || defined(ENABLE_FRAMES)
+ idma_direction->idma_fifo = allocate_packet_injection_memoryfifo(core, direction) ;
+ idma_direction->idma_tag = allocate_packet_injection_tag(core,direction) ;
+ idma_direction->idma_skb_array = allocate_packet_skb_array(core,direction) ;
+ idma_direction->fifo_head_index = 0 ;
+ idma_direction->fifo_tail_index = 0 ;
+ idma_direction->buffer_head_index = 0 ;
+ idma_direction->buffer_tail_index = 0 ;
+ idma_direction->injection_vacant = 0 ;
+ idma_direction->injection_high_watermark = 0 ;
+ idma_direction->packets_injected_count = 0 ;
+#endif
+#if defined(ENABLE_FRAMES)
+ skb_queue_head_init(&idma_direction->frame_queue) ;
+#endif
+ }
+
+typedef struct {
+ idma_direction_t idma_direction[k_injecting_directions] ;
+ memcpy_packet_injection_memoryfifo_t *memcpy_packet_fifo ;
+ unsigned int memcpy_packet_fifo_head_index ;
+ unsigned int memcpy_packet_fifo_tail_index ;
+ unsigned int memcpy_fifo_initial_head ;
+} idma_core_t ;
+
+static inline void allocate_idma_core(idma_core_t * idma_core,unsigned int core)
+ {
+ int direction ;
+ for( direction=0 ; direction<k_injecting_directions;direction+=1 )
+ {
+ allocate_idma_direction(idma_core->idma_direction+direction, core, direction) ;
+ }
+ idma_core->memcpy_packet_fifo=allocate_memcpy_packet_injection_memoryfifo(core) ;
+ }
+
+typedef struct {
+ idma_core_t idma_core[k_injecting_cores] ;
+} idma_t ;
+
+static inline void allocate_idma(idma_t * idma)
+ {
+ int core ;
+ for( core=0 ; core<k_injecting_cores;core+=1 )
+ {
+ allocate_idma_core(idma->idma_core+core, core) ;
+ }
+ }
+
+/* 'per-slot' structures for demultiplexing received torus messages. */
+/* we are no longer running 1 slot per possubly-sending core, i.e. 4 per node in the partition; now running 1 per node */
+/* Get/set methods because for 'large' machines we might need bigger tables than can be kmalloced in one go */
+#if defined(ENABLE_LATENCY_TRACKING)
+
+typedef struct {
+ unsigned long long s1 ;
+ unsigned long long sx ;
+ unsigned long long sxx ;
+ unsigned int xmin ;
+ unsigned int xmax ;
+} rcv_statistic_t ;
+
+static void rcv_statistic_clear(rcv_statistic_t *t)
+ {
+ t->s1 = 0;
+ t->sx = 0;
+ t->sxx = 0 ;
+ t->xmin = 0xffffffff ;
+ t->xmax = 0 ;
+ }
+static void rcv_statistic_observe(rcv_statistic_t *t, unsigned int x)
+ {
+ unsigned long long ullx = x ;
+ unsigned long long ullxx = ullx*ullx ;
+ t->s1 += 1 ;
+ t->sx += x ;
+ t->sxx += ullxx ;
+ if( x<t->xmin ) t->xmin=x ;
+ if( x>t->xmax ) t->xmax=x ;
+ }
+static unsigned int rcv_statistic_mean(rcv_statistic_t *t)
+ {
+ unsigned long long s1=t->s1 ;
+ unsigned long long sx=t->sx ;
+ unsigned long long rc = sx ;
+ do_div(rc,(unsigned int)s1) ;
+ TRACEN(k_t_detail,"sx=0x%08x%08x s1=0x%08x%08x mean=%u",
+ (unsigned int)(sx>>32),(unsigned int)sx,
+ (unsigned int)(s1>>32),(unsigned int)s1,(unsigned int)rc) ;
+ return (unsigned int)rc ;
+ }
+static unsigned int rcv_statistic_variance(rcv_statistic_t *t, unsigned int m)
+ {
+ unsigned long long s1=t->s1 ;
+ unsigned long long sx=t->sx ;
+ unsigned long long sxx=t->sxx ;
+ unsigned long long mm=m ;
+ unsigned long long vv = sxx - mm*mm ;
+ unsigned long long rc=vv ;
+ do_div(rc,(unsigned int)s1) ;
+ TRACEN(k_t_detail,"sxx=0x%08x%08x sx=0x%08x%08x s1=0x%08x%08x mm=0x%08x%08x vv=0x%08x%08x variance=%u",
+ (unsigned int)(sxx>>32),(unsigned int)sxx,
+ (unsigned int)(sx>>32),(unsigned int)sx,
+ (unsigned int)(s1>>32),(unsigned int)s1,
+ (unsigned int)(mm>>32),(unsigned int)mm,
+ (unsigned int)(vv>>32),(unsigned int)vv,
+ (unsigned int)rc) ;
+ return (unsigned int)rc ;
+ }
+#endif
+/* TODO: Can this be condensed ? Should be a 'char * payload' and a 'char * payload_alert', down to 8 bytes */
+/* or could even be a 28-bit address (since we know 16-byte alignment) and a 4-bit count so we treat things */
+/* in more detail every 16 packets or when the frame is done if sooner */
+/* TODO: also: maybe the injector should flag the last packet of a frame with a different function ? */
+typedef struct {
+ unsigned char * payload ;
+ unsigned char * payload_alert ;
+ unsigned int expect ;
+ int lastcell ;
+ unsigned int proposals_active ;
+ struct sk_buff_head proposals_pending_flow ;
+#if defined(USE_ADAPTIVE_ROUTING)
+ struct sk_buff * skb_per_conn[k_concurrent_receives] ;
+#if defined(RESEQUENCE_ARRIVALS)
+ struct sk_buff * skb_pending_resequence[k_concurrent_receives] ;
+ unsigned int conn_id_pending_delivery ;
+#endif
+#endif
+#if defined(ENABLE_LATENCY_TRACKING)
+ rcv_statistic_t latency ;
+ unsigned int basetime ;
+#endif
+#if defined(ENABLE_PROGRESS_TRACKING)
+ unsigned long long timestamp ;
+#endif
+} rcv_per_slot_t ;
+
+typedef struct {
+ unsigned int partner_ip_address ;
+ unsigned int partner_xyz ;
+} learned_address_entry ;
+
+typedef struct {
+ rcv_per_slot_t * rcv_per_slot_vector ;
+ struct sk_buff ** skb_per_slot_vector ;
+} rcv_t ;
+
+static inline char * get_rcv_payload(rcv_t *rcv, unsigned int slot_index)
+ {
+ return rcv->rcv_per_slot_vector[slot_index].payload ;
+ }
+
+static inline void set_rcv_payload(rcv_t *rcv, unsigned int slot_index, char * payload )
+ {
+ rcv->rcv_per_slot_vector[slot_index].payload = payload ;
+ }
+
+static inline unsigned int get_proposals_active(rcv_t *rcv, unsigned int slot_index)
+ {
+ return rcv->rcv_per_slot_vector[slot_index].proposals_active ;
+ }
+
+static inline void set_proposals_active(rcv_t *rcv, unsigned int slot_index, unsigned int proposals_active )
+ {
+ rcv->rcv_per_slot_vector[slot_index].proposals_active = proposals_active ;
+ }
+
+static inline char * get_rcv_payload_alert(rcv_t *rcv, unsigned int slot_index)
+ {
+ return rcv->rcv_per_slot_vector[slot_index].payload_alert ;
+ }
+
+static inline void set_rcv_payload_alert(rcv_t *rcv, unsigned int slot_index, char * payload_alert )
+ {
+ rcv->rcv_per_slot_vector[slot_index].payload_alert = payload_alert ;
+ }
+
+static inline unsigned int get_rcv_expect(rcv_t *rcv, unsigned int slot_index)
+ {
+ return rcv->rcv_per_slot_vector[slot_index].expect ;
+ }
+
+static inline void set_rcv_expect(rcv_t *rcv, unsigned int slot_index, unsigned int expect)
+ {
+ rcv->rcv_per_slot_vector[slot_index].expect = expect ;
+ }
+
+static inline int get_rcv_lastcell(rcv_t *rcv, unsigned int slot_index)
+ {
+ return rcv->rcv_per_slot_vector[slot_index].lastcell ;
+ }
+
+static inline void set_rcv_lastcell(rcv_t *rcv, unsigned int slot_index, int lastcell)
+ {
+ rcv->rcv_per_slot_vector[slot_index].lastcell = lastcell ;
+ }
+
+static inline struct sk_buff * get_rcv_skb(rcv_t *rcv, unsigned int slot_index)
+ {
+ return rcv->skb_per_slot_vector[slot_index] ;
+ }
+
+static inline void set_rcv_skb(rcv_t *rcv, unsigned int slot_index, struct sk_buff * skb)
+ {
+ rcv->skb_per_slot_vector[slot_index] = skb ;
+ }
+
+static inline void init_pending_flow(rcv_t *rcv, unsigned int slot_index)
+{
+ skb_queue_head_init(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow) ;
+}
+
+static inline void enq_pending_flow(rcv_t *rcv, unsigned int slot_index, struct sk_buff * skb)
+{
+ skb_queue_tail(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow,skb) ;
+}
+
+static inline struct sk_buff * deq_pending_flow(rcv_t *rcv, unsigned int slot_index)
+{
+ return skb_dequeue(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow) ;
+}
+
+static inline unsigned int count_pending_flow(rcv_t *rcv, unsigned int slot_index)
+{
+ return skb_queue_len(&rcv->rcv_per_slot_vector[slot_index].proposals_pending_flow) ;
+}
+
+#if defined(USE_ADAPTIVE_ROUTING)
+static inline struct sk_buff * get_rcv_skb_for_conn(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id)
+{
+ return rcv->rcv_per_slot_vector[slot_index].skb_per_conn[conn_id & (k_concurrent_receives-1)] ;
+}
+
+static void set_rcv_skb_for_conn(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb) __attribute__((unused)) ;
+static void set_rcv_skb_for_conn(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb)
+{
+ rcv->rcv_per_slot_vector[slot_index].skb_per_conn[conn_id & (k_concurrent_receives-1)] = skb ;
+}
+#if defined(RESEQUENCE_ARRIVALS)
+ static inline struct sk_buff * get_rcv_skb_pending_resequence(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id)
+ {
+ return rcv->rcv_per_slot_vector[slot_index].skb_pending_resequence[conn_id & (k_concurrent_receives-1)] ;
+ }
+ static inline void set_rcv_skb_pending_resequence(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb)
+ {
+ rcv->rcv_per_slot_vector[slot_index].skb_pending_resequence[conn_id & (k_concurrent_receives-1)] = skb;
+ }
+ static inline int get_rcv_conn_pending_delivery(rcv_t *rcv, unsigned int slot_index)
+ {
+ return rcv->rcv_per_slot_vector[slot_index].conn_id_pending_delivery ;
+ }
+ static void set_rcv_conn_pending_delivery(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id) __attribute__((unused)) ;
+ static void set_rcv_conn_pending_delivery(rcv_t *rcv, unsigned int slot_index, unsigned int conn_id)
+ {
+ rcv->rcv_per_slot_vector[slot_index].conn_id_pending_delivery=conn_id ;
+ }
+
+#endif
+
+#endif
+
+static inline unsigned long long get_timestamp(rcv_t *rcv, unsigned int slot_index)
+ {
+#if defined(ENABLE_PROGRESS_TRACKING)
+ return rcv->rcv_per_slot_vector[slot_index].timestamp ;
+#else
+ return 0 ;
+#endif
+ }
+
+static inline void set_timestamp(rcv_t *rcv, unsigned int slot_index, unsigned long long timestamp)
+ {
+#if defined(ENABLE_PROGRESS_TRACKING)
+ rcv->rcv_per_slot_vector[slot_index].timestamp=timestamp ;
+#endif
+ }
+
+enum {
+ k_slots_per_node = 1 , /* down from 4 ... */
+ k_connids_per_node = 128 /* Number of conn-ids we track per node on the sending side */
+};
+static inline void allocate_rcv(rcv_t *rcv, unsigned int node_count)
+ {
+ rcv->rcv_per_slot_vector = kmalloc(k_slots_per_node*node_count*sizeof(rcv_per_slot_t), GFP_KERNEL) ;
+ BUG_ON(NULL == rcv->rcv_per_slot_vector) ;
+ memset(rcv->rcv_per_slot_vector,0,k_slots_per_node*node_count*sizeof(rcv_per_slot_t)) ;
+ rcv->skb_per_slot_vector = kmalloc(k_slots_per_node*node_count*sizeof(struct sk_buff *), GFP_KERNEL) ;
+ BUG_ON(NULL == rcv->skb_per_slot_vector) ;
+ BUG_ON(NULL == rcv->skb_per_slot_vector) ;
+ {
+ unsigned int slot ;
+ for(slot=0;slot<node_count;slot+=1)
+ {
+ init_pending_flow(rcv,slot) ;
+ }
+ }
+ }
+
+#if defined(USE_ADAPTIVE_ROUTING)
+
+extern ulong bgp_dma_adaptive_frame_limit ;
+
+typedef struct {
+ atomic_t * conn_id ;
+#if defined(USE_SKB_TO_SKB)
+ struct sk_buff **skb ;
+#endif
+} tx_t ;
+
+static inline void init_tx_conn_id(tx_t *tx, unsigned int slot_index)
+{
+ atomic_set(tx->conn_id+slot_index,0xffffffff) ;
+}
+
+static inline void allocate_tx(tx_t *tx, unsigned int node_count)
+ {
+ tx->conn_id = kmalloc(k_slots_per_node*node_count*sizeof(atomic_t), GFP_KERNEL) ;
+ BUG_ON(NULL == tx->conn_id) ;
+ {
+ int x ;
+ for(x=0;x<node_count;x+=1)
+ {
+ init_tx_conn_id(tx,x) ;
+ }
+ }
+#if defined(USE_SKB_TO_SKB)
+ tx->skb = kmalloc(k_connids_per_node*node_count*sizeof(struct sk_buff *),GFP_KERNEL) ;
+#endif
+ BUG_ON(NULL == tx->skb) ;
+ memset(tx->skb,0,k_connids_per_node*node_count*sizeof(struct sk_buff *)) ;
+ }
+
+static inline unsigned int take_tx_conn_id(tx_t *tx, unsigned int slot_index)
+{
+ unsigned int rc= atomic_inc_return(tx->conn_id+slot_index) ;
+ TRACEN(k_t_general,"slot_index=0x%08x conn_id=0x%08x",slot_index,rc) ;
+ return rc ;
+}
+#if defined(USE_SKB_TO_SKB)
+static inline struct sk_buff * get_tx_skb(tx_t *tx, unsigned int slot_index, unsigned int conn_id)
+{
+ return tx->skb[slot_index*k_connids_per_node+(conn_id & (k_connids_per_node-1))] ;
+}
+static inline void set_tx_skb(tx_t *tx, unsigned int slot_index, unsigned int conn_id, struct sk_buff * skb)
+{
+ tx->skb[slot_index*k_connids_per_node+(conn_id & (k_connids_per_node-1))] = skb ;
+}
+
+#endif
+
+#endif
+
+/* End of 'packets' style section */
+enum {
+ k_desired_reception_memory_fifo_size =
+#if defined(CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT)
+ 1 << (CONFIG_BGP_RECEPTION_MEMORY_FIFO_SHIFT)
+#else
+ 1 << 22 /* Try 4MB as a static region, if not set externally */
+/* 1 << 20 // Try 1MB as a static region, if not set externally */
+#endif
+} ;
+enum {
+ k_metadata_injection_memory_fifo_size = 4096 ,
+ k_bulk_injection_memory_fifo_size = 4096
+};
+
+typedef struct {
+ char buffer[k_metadata_injection_memory_fifo_size] ;
+} metadata_injection_memoryfifo_t ;
+
+typedef struct {
+ char buffer[k_bulk_injection_memory_fifo_size] ;
+} bulk_injection_memoryfifo_t ;
+
+
+#if defined(BARRIER_WITH_IOCTL)
+enum {
+ k_diag_target_data_size = 1<<20 , /* Aim up to 1MB ... */
+ k_diag_packet_count = k_diag_target_data_size/k_injection_packet_size , /* Rounding down for packets ... */
+};
+typedef struct {
+ char buffer[k_diag_target_data_size] ;
+} diag_block_buffer_t ;
+
+static inline diag_block_buffer_t * allocate_diag_block_buffer(void)
+{
+ diag_block_buffer_t * result = kmalloc(k_diag_target_data_size,GFP_KERNEL) ;
+ BUG_ON(NULL == result) ;
+ return result ;
+}
+
+static inline unsigned int * allocate_shuffle_vector(unsigned int xe, unsigned int ye, unsigned int ze)
+{
+ unsigned int * result = kmalloc(xe*ye*ze*sizeof(unsigned int),GFP_KERNEL) ;
+ BUG_ON(NULL == result) ;
+ return result ;
+}
+#endif
+
+
+enum {
+ k_Dimensionality = 3
+};
+
+typedef struct {
+ unsigned char coordinate[k_Dimensionality] ;
+} torusLocation_t ;
+
+typedef enum {
+ k_send_propose_rpc ,
+ k_act_propose_rpc ,
+ k_send_accept_rpc ,
+ k_act_accept_rpc ,
+
+ k_defer_accept_rpc_counters ,
+ k_defer_accept_rpc_nodeflow ,
+ k_send_eager ,
+ k_receive_eager ,
+
+ k_no_reception_counter ,
+ k_parked ,
+ k_scattergather ,
+ k_receive_incomplete ,
+
+ k_headlength_zero ,
+ k_fraglength_zero ,
+ k_accept_audit_fail ,
+ k_receive_audit_fail ,
+
+ k_counted_length_mismatch ,
+ k_reordered ,
+ k_queue_filled_propose_fifo ,
+
+
+ k_flow_counters
+} flowpoint_e ;
+
+#if defined(CONFIG_BGP_STATISTICS)
+extern int reception_fifo_histogram[33] ;
+extern int reception_hi_watermark ;
+extern int rtt_histogram[33] ;
+extern int transit_histogram[33] ;
+#endif
+
+enum {
+ k_pending_rcv_skb_classes = 6
+};
+typedef struct {
+ struct sk_buff_head pending_rcv_skbs ; /* List of sk_buffs awaiting a reception counter */
+ unsigned int outstanding_counters ; /* Number of counters awaiting completion in this direction */
+} bgp_dma_balancer_direction ;
+typedef struct {
+ bgp_dma_balancer_direction b[k_pending_rcv_skb_classes] ;
+} bgp_dma_balancer ;
+typedef struct {
+ torusLocation_t location ;
+ torusLocation_t extent ;
+ /* Number of bits required to represent a node in each torus dimension */
+ unsigned int xbits ;
+ unsigned int ybits ;
+ unsigned int zbits ;
+
+ DMA_RecFifoGroup_t * recFifoGroup;
+ rcv_t rcvdemux ; /* Reception demultiplex */
+#if defined(USE_ADAPTIVE_ROUTING)
+ tx_t tx_mux ; /* Transmission multiplexer (conn_ids by slot) */
+#endif
+ unsigned int node_count ; /* Total number of nodes in the block */
+ unsigned int node_slot_mask ; /* ((node_count << 2)-1) , for bit-masking to firewall check received data */
+#ifdef ENABLE_PACKETS
+ DMA_InjFifoGroup_t injFifoGroupPackets;
+ int injFifoPacketsIds[ k_injecting_cores*k_injecting_directions ];
+ int proto_issue_packets ;
+
+ /* End of packets-style interface */
+#endif
+ idma_t idma ; /* Injection DMA buffering */
+#ifdef ENABLE_PACKETS
+ unsigned short int injFifoPacketsPri[ k_injecting_cores*k_injecting_directions ] ;
+ unsigned short int injFifoPacketsLoc[ k_injecting_cores*k_injecting_directions ] ;
+ unsigned char injFifoPacketsMap[ k_injecting_cores*k_injecting_directions ] ;
+#endif
+ struct sk_buff_head inj_queue[k_injecting_directions] ; /* Lists of skb's queued because DMA buffers have no space */
+ unsigned int packets_received_count ;
+ struct timer_list runway_check_timer ;
+ struct timer_list transmission_free_skb_timer ;
+#if defined(HAS_MISSED_INTERRUPT_TIMER)
+ struct timer_list torus_missed_interrupt_timer ;
+#endif
+#ifdef ENABLE_FRAMES
+ DMA_InjFifoGroup_t injFifoGroupFrames;
+ int injFifoFramesIds[ k_injecting_cores*k_injecting_directions ];
+ int proto_issue_frames_single ;
+#if defined(USE_ADAPTIVE_ROUTING)
+ int proto_issue_frames_adaptive ;
+#endif
+#if defined(USE_SKB_TO_SKB)
+ int proto_transfer_propose ;
+ int eager_limit ; /* frames larger than this to be sent with skb-to-skb DMA */
+ int flow_counter[k_flow_counters] ;
+#endif
+#if defined(BARRIER_WITH_IOCTL)
+ int proto_issue_diag_sync ;
+ diag_block_buffer_t * diag_block_buffer ;
+ unsigned int * shuffle_vector ;
+ unsigned int shuffle_seed ;
+ int prev_tbl ;
+ unsigned int timing_histogram_buckets[33] ;
+#endif
+ unsigned short int injFifoFramesPri[ k_injecting_cores*k_injecting_directions ] ;
+ unsigned short int injFifoFramesLoc[ k_injecting_cores*k_injecting_directions ] ;
+ unsigned char injFifoFramesMap[ k_injecting_cores*k_injecting_directions ] ;
+#endif
+
+ DMA_CounterGroup_t injCounterGroup;
+ DMA_CounterGroup_t recCounterGroup;
+
+ void * receptionfifo ;
+ unsigned int receptionfifoSize ;
+
+ unsigned int mtu ;
+ unsigned int max_packets_per_frame ;
+
+ DMA_RecFifoMap_t recMap; /* rec fifo map structure */
+
+
+
+#if defined(USE_SKB_TO_SKB)
+ int injCntrSubgrps[ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ] ;
+ int recCntrSubgrps[ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ] ;
+ char recCntrInUse [ DMA_NUM_COUNTERS_PER_GROUP ] ;
+ int qtyFreeRecCounters ;
+ int scanRecCounter ;
+ struct sk_buff * inj_skbs[DMA_NUM_COUNTERS_PER_GROUP] ;
+ struct sk_buff * rcv_skbs[DMA_NUM_COUNTERS_PER_GROUP] ;
+ unsigned int slot_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ;
+ unsigned char conn_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ;
+ int rcv_timestamp[DMA_NUM_COUNTERS_PER_GROUP] ;
+ int rcv_checked_time ;
+ bgp_dma_balancer balancer ;
+ atomic_t framesProposed ;
+ unsigned int framesDisposed ;
+#endif
+ unsigned short int memcpyInjFifoFramesPri[ k_injecting_cores ] ;
+ unsigned short int memcpyInjFifoFramesLoc[ k_injecting_cores ] ;
+ unsigned char memcpyInjFifoFramesMap[ k_injecting_cores ] ;
+ DMA_InjFifoGroup_t memcpyInjFifoGroupFrames;
+ int memcpyInjFifoFramesIds[ k_injecting_cores ];
+ DMA_CounterGroup_t memcpyRecCounterGroup;
+ int memcpyRecCntrSubgrps[ DMA_NUM_COUNTER_SUBGROUPS_PER_GROUP ] ;
+
+ int proto_diagnose ; /* 'diagnose' frame to software reception FIFO */
+
+ unsigned int SW_Arg ; /* / 'Software Arg', we send our {x,y,z} */
+ unsigned int src_key ; /* 'source key', we send rank */
+
+
+ spinlock_t dirInjectionLock[k_injecting_cores*k_injecting_directions] ; /* serialise access to injection FIFOs */
+
+ void * previousActor ; /* FIFO address of previous Actor, for detecting replays */
+
+
+ /* sysctl entries */
+ struct ctl_table_header * sysctl_table_header ;
+/* Statistics */
+
+ struct net_device_stats * device_stats ;
+ unsigned int count_no_skbuff ;
+ unsigned int tx_by_core[4] ;
+ unsigned int tx_in_use_count[k_injecting_directions+1] ;
+#if defined(KEEP_TCP_FLAG_STATS)
+ unsigned int tcp_received_flag_count[8] ;
+#endif
+/* Tuning parameters */
+ int tuning_num_packets ; /* = 1 , number of packets to process per poll call */
+ int tuning_num_empty_passes ; /* = 512 , number of times to spin before returning */
+ int tuning_non_empty_poll_delay ; /* = 850 , number of cycles to spin between looks at the FIFO */
+ int tuning_poll_after_enabling ; /* = 1 , whether to poll again after enabling for interrupts */
+ int tuning_run_handler_on_hwi ; /* = 1 , whether to run the hander on FIFO hardware interrupts (as well as rDMA ones) */
+ int tuning_clearthresh_slih ; /* = 1 , whether to clear the 'threshold crossed' bit in the slih */
+ int tuning_clearthresh_flih ; /* = 1 , whether to clear the 'threshold crossed' bit in the flih */
+ int tuning_disable_in_dcr ; /* = 1, whether to toggle the DCR interrupt enable/disable */
+ int tuning_injection_hashmask ; /* = 3, whether to mask down the number of injection FIFOs in use per direction */
+
+ int tuning_recfifo_threshold ; /* for moving to/from DCR */
+ int tuning_dcr_c8b ; /* for moving to/from DCR */
+ int tuning_enable_hwfifo ; /* For registering/unregistering 'hardware FIFO' interrupts */
+
+ int tuning_exploit_reversepropose ; /* Whether to try the 'reverse propose' protocol */
+ int tuning_counters_per_source ; /* How many reception counters to commit per source node */
+ int tuning_defer_skb_until_counter ; /* Whether to defer sk_buff allocation until a reception counter is available */
+ int tuning_deliver_eagerly ; /* Whether to skip the 'resequence arrivals' step */
+ int tuning_diagnose_rst ; /* Whether to cut trace records when being asked to send a TCP segment with a 'rst' */
+
+ int tuning_select_fifo_algorithm ; /* Which FIFO selection algorithm to use (head-of-line block minimisation) */
+
+ int tuning_min_icsk_timeout ; /* What to push ICSK retransmit timeout up to if we find it low */
+
+ int tuning_virtual_channel ; /* Which virtual channel to use (i.e. whether to force deterministic routing) */
+
+ unsigned int block_id ;
+ unsigned char i_am_compute_node ;
+ unsigned char bluegene_tcp_is_built ;
+ unsigned char is_torus_x ;
+ unsigned char is_torus_y ;
+ unsigned char is_torus_z ;
+ unsigned char last_queue_picked ;
+#if defined(CONFIG_BGP_STATISTICS)
+ unsigned int resequence_histogram[k_concurrent_receives] ;
+ unsigned long long bytes_sent ;
+ unsigned long long bytes_received ;
+#endif
+} dma_tcp_t ;
+
+typedef enum {
+ k_VC_ordering = DMA_PACKET_VC_BN , /* virtual channel to use when we want to order things, 'Bubble Normal' */
+ k_VC_anyway = DMA_PACKET_VC_D0 /* virtual channel to use otherwise ... 'Dynamic 0' */
+} VC_e ;
+
+static inline unsigned int virtual_channel(dma_tcp_t *dma_tcp, VC_e channel_hint)
+{
+ return k_configurable_virtual_channel ? dma_tcp->tuning_virtual_channel : channel_hint ;
+}
+
+static inline void instrument_flow(dma_tcp_t *dma_tcp,flowpoint_e flowpoint)
+{
+ dma_tcp->flow_counter[flowpoint] += 1 ;
+}
+
+static inline unsigned int flow_count(dma_tcp_t *dma_tcp,flowpoint_e flowpoint)
+{
+ return dma_tcp->flow_counter[flowpoint] ;
+}
+
+extern dma_tcp_t dma_tcp_state ;
+
+void bgp_dma_tcp_display_pending_slots(dma_tcp_t * dma_tcp, unsigned int nodecount ) ;
+void bgp_dma_diag_reissue_rec_counters(dma_tcp_t *dma_tcp) ;
+
+void bgp_dma_tcp_empty_fifo_callback(void) ;
+
+extern void bluegene_set_cpu_for_irq(unsigned int irq, unsigned int cpu) ;
+extern void bluegene_bic_disable_irq(unsigned int irq) ;
+
+int bgnet_receive_torus(struct sk_buff * skb) ;
+int bgtornet_receive_torus(struct sk_buff * skb) ;
+struct net_device_stats *bgtornet_stats(void) ;
+
+void bgtornet_rx_schedule(void) ;
+
+
+static inline int DMA_CounterSetValueWideOpen(
+ DMA_Counter_t *c_sw,
+ unsigned int value
+ )
+{
+ unsigned int pa_base=0, pa_max=0xffffffff;
+ SPI_assert( c_sw != NULL );
+ c_sw->pa_base = pa_base;
+ c_sw->pa_max = pa_max;
+
+ /*
+ * Write the value, base, and max to the hardware counter
+ */
+ DMA_CounterSetValueBaseMaxHw(c_sw->counter_hw_ptr,
+ value,
+ pa_base,
+ pa_max);
+
+ return (0);
+}
+
+static inline int DMA_CounterSetValueWideOpenById(
+ DMA_CounterGroup_t *cg_ptr,
+ int counter_id ,
+ unsigned int value
+ )
+ {
+ int rc;
+
+ SPI_assert( (counter_id >= 0) && (counter_id < DMA_NUM_COUNTERS_PER_GROUP) );
+ SPI_assert( cg_ptr != NULL );
+ SPI_assert( (cg_ptr->permissions[DMA_COUNTER_GROUP_WORD_ID(counter_id)] &
+ _BN(DMA_COUNTER_GROUP_WORD_BIT_ID(counter_id))) != 0 );
+
+ rc = DMA_CounterSetValueWideOpen( &cg_ptr->counter[counter_id], value ) ;
+
+ /* Note: it is assumed that the above function call performs an MBAR */
+
+ return rc;
+
+ }
+
+/* Choose a transmission FIFO for a stream. This is 'approximately' the deterministic routing algorithm */
+/* (I think it is 'exactly' the deterministic routing algorithm, with the possible exception of what the hardware will do */
+/* if you send a packet to something half-way-round in one of the torus dimensions) */
+/* Return -1 if it is an attempted 'self-send'; this has to be done as a local DMA or a memcpy, not as a torus op */
+static int select_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x, unsigned int y, unsigned int z) __attribute__ ((unused)) ;
+static inline int sign_extend(int d, unsigned int bb)
+{
+ return (d << (32-bb)) >> (32-bb) ;
+}
+static inline int resolve_direction(int d, unsigned int is_torus, unsigned int bb, int v0, int v1)
+{
+ if( is_torus) d = sign_extend(d,bb) ;
+ return (d<0) ? v1 : v0 ;
+}
+static int select_transmission_fifo_v(dma_tcp_t *dma_tcp, unsigned int x0,unsigned int x, unsigned int y0,unsigned int y, unsigned int z0,unsigned int z)
+ {
+ switch(dma_tcp->tuning_select_fifo_algorithm)
+ {
+ case 0:
+ case 1:
+ {
+ int dx = x0-x ;
+ int dy = y0-y ;
+ int dz = z0-z ;
+ if( dx != 0 ) return resolve_direction(dx, dma_tcp->is_torus_x,dma_tcp->xbits, 1, 0) ;
+ if( dy != 0 ) return resolve_direction(dy, dma_tcp->is_torus_y,dma_tcp->ybits, 3, 2) ;
+ return resolve_direction(dz,dma_tcp->is_torus_z,dma_tcp->zbits, 5, 4) ;
+ }
+ default:
+ /* rank modulo 6 ... */
+ return ((x<<(dma_tcp->ybits+dma_tcp->zbits)) | (y<<(dma_tcp->zbits)) | (z)) % 6 ;
+
+ }
+ }
+
+static int select_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x, unsigned int y, unsigned int z)
+{
+ return select_transmission_fifo_v(dma_tcp,dma_tcp->location.coordinate[0],x,dma_tcp->location.coordinate[1],y,dma_tcp->location.coordinate[2],z) ;
+}
+
+/* Report the transmission FIFO that a remote node will use to reach this node */
+static int report_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x0, unsigned int y0, unsigned int z0) __attribute__ ((unused)) ;
+static int report_transmission_fifo(dma_tcp_t *dma_tcp, unsigned int x0, unsigned int y0, unsigned int z0)
+{
+ return select_transmission_fifo_v(dma_tcp,x0,dma_tcp->location.coordinate[0],y0,dma_tcp->location.coordinate[1],z0,dma_tcp->location.coordinate[2]) ;
+}
+
+
+
+int handleSocketsRecvMsgActor(DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ ) ;
+int handleSocketsRecvMsgCompletedActor(DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ ) ;
+int handleSocketsBufferActor(DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ ) ;
+
+
+#ifdef ENABLE_PACKETS
+void dma_tcp_packets_init(dma_tcp_t *dma_tcp) ;
+int bgp_dma_tcp_send_and_free_packets( struct sk_buff *skb
+ ) ;
+void dma_tcp_packets_show_counts(dma_tcp_t *dma_tcp) ;
+
+#endif
+#ifdef ENABLE_FRAMES
+void dma_tcp_frames_init(dma_tcp_t *dma_tcp) ;
+int bgp_dma_tcp_send_and_free_frames( struct sk_buff *skb
+ ) ;
+#endif
+
+/* ethem codings are ... */
+/* 0 : run things on the tree */
+/* 1 : run things with 'actors' and DMA to/from SKBUFFs */
+/* 2 : run things with 'messages' between memory FIFOs */
+/* 3 : send both (1) and (2), for bringup. */
+/* until it's working correctly, we will deliver the '1' eth frames and discard the '2' eth frames at the receiver. */
+/* Additionally we can set a '4' bit, which will send packets over the tree; */
+/* so we could set '6' and get a working tree drive, and 'messages' flows to go through the motions on a prototype driver without any 'actors' flows */
+
+extern int bgp_dma_ethem ;
+
+/**********************************************************************
+ * DCR access wrapper
+ **********************************************************************/
+
+static inline uint32_t mfdcrx(uint32_t dcrn)
+{
+ uint32_t value;
+ asm volatile ("mfdcrx %0,%1": "=r" (value) : "r" (dcrn) : "memory");
+ return value;
+}
+
+static inline void mtdcrx(uint32_t dcrn, uint32_t value)
+{
+ asm volatile("mtdcrx %0,%1": :"r" (dcrn), "r" (value) : "memory");
+}
+
+
+static void dumpdmadcrs(unsigned int tracelevel) __attribute__ ((unused)) ;
+static void dumpdmadcrs(unsigned int tracelevel)
+ {
+ int x ;
+ for(x=0xd00; x<=0xdff ; x += 8 )
+ {
+ int d0 __attribute__ ((unused)) = mfdcrx(x) ;
+ int d1 __attribute__ ((unused)) = mfdcrx(x+1) ;
+ int d2 __attribute__ ((unused)) = mfdcrx(x+2) ;
+ int d3 __attribute__ ((unused)) = mfdcrx(x+3) ;
+ int d4 __attribute__ ((unused)) = mfdcrx(x+4) ;
+ int d5 __attribute__ ((unused)) = mfdcrx(x+5) ;
+ int d6 __attribute__ ((unused)) = mfdcrx(x+6) ;
+ int d7 __attribute__ ((unused)) = mfdcrx(x+7) ;
+ TRACEN(tracelevel,"Torus DMA dcrs 0x%04x %08x %08x %08x %08x %08x %08x %08x %08x",
+ x,d0,d1,d2,d3,d4,d5,d6,d7
+ ) ;
+ }
+ }
+
+static void dumptorusdcrs(void) __attribute__ ((unused)) ;
+static void dumptorusdcrs(void)
+ {
+ int x ;
+ for(x=0xc80; x<=0xc8f ; x += 8 )
+ {
+ int d0 __attribute__ ((unused)) = mfdcrx(x) ;
+ int d1 __attribute__ ((unused)) = mfdcrx(x+1) ;
+ int d2 __attribute__ ((unused)) = mfdcrx(x+2) ;
+ int d3 __attribute__ ((unused)) = mfdcrx(x+3) ;
+ int d4 __attribute__ ((unused)) = mfdcrx(x+4) ;
+ int d5 __attribute__ ((unused)) = mfdcrx(x+5) ;
+ int d6 __attribute__ ((unused)) = mfdcrx(x+6) ;
+ int d7 __attribute__ ((unused)) = mfdcrx(x+7) ;
+ TRACEN(k_t_request,"Torus control dcrs 0x%04x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ x,d0,d1,d2,d3,d4,d5,d6,d7
+ ) ;
+ }
+ }
+
+#if defined(REQUIRES_DUMPMEM)
+static inline char cfix(char x) __attribute__ ((unused)) ;
+static void dumpmem(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ;
+static void dumpframe(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ;
+
+static inline char cfix(char x)
+ {
+ return ( x >= 0x20 && x < 0x80 ) ? x : '.' ;
+ }
+static void dumpmem(const void *address, unsigned int length, const char * label)
+ {
+ int x ;
+ TRACEN(k_t_fifocontents|k_t_scattergather|k_t_request,"(>)Memory dump length=0x%08x: %s",length,label) ;
+ for (x=0;x<length;x+=32)
+ {
+ int *v __attribute__ ((unused)) = (int *)(address+x) ;
+ char *c __attribute__ ((unused)) = (char *)(address+x) ;
+ TRACEN(k_t_fifocontents|k_t_scattergather|k_t_request,"%p: %08x %08x %08x %08x %08x %08x %08x %08x %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c",
+ v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7],
+ cfix(c[0]),cfix(c[1]),cfix(c[2]),cfix(c[3]),
+ cfix(c[4]),cfix(c[5]),cfix(c[6]),cfix(c[7]),
+ cfix(c[8]),cfix(c[9]),cfix(c[10]),cfix(c[11]),
+ cfix(c[12]),cfix(c[13]),cfix(c[14]),cfix(c[15]),
+ cfix(c[16]),cfix(c[17]),cfix(c[18]),cfix(c[19]),
+ cfix(c[20]),cfix(c[21]),cfix(c[22]),cfix(c[23]),
+ cfix(c[24]),cfix(c[25]),cfix(c[26]),cfix(c[27]),
+ cfix(c[28]),cfix(c[29]),cfix(c[30]),cfix(c[31])
+ ) ;
+ }
+ TRACEN(k_t_fifocontents|k_t_scattergather|k_t_request,"(<)Memory dump") ;
+ }
+
+static void dumpframe(const void *address, unsigned int length, const char * label)
+ {
+ int x ;
+ unsigned int limlen = (length>1024) ? 1024 : length ;
+ TRACEN(k_t_fifocontents,"(>)ethframe dump length=%d: %s",length,label) ;
+ for (x=0;x<limlen;x+=32)
+ {
+ int *v __attribute__ ((unused)) = (int *)(address+x) ;
+ char *c __attribute__ ((unused)) = (char *)(address+x) ;
+ TRACEN(k_t_fifocontents,"%p: %08x %08x %08x %08x %08x %08x %08x %08x %c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c",
+ v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7],
+ cfix(c[0]),cfix(c[1]),cfix(c[2]),cfix(c[3]),
+ cfix(c[4]),cfix(c[5]),cfix(c[6]),cfix(c[7]),
+ cfix(c[8]),cfix(c[9]),cfix(c[10]),cfix(c[11]),
+ cfix(c[12]),cfix(c[13]),cfix(c[14]),cfix(c[15]),
+ cfix(c[16]),cfix(c[17]),cfix(c[18]),cfix(c[19]),
+ cfix(c[20]),cfix(c[21]),cfix(c[22]),cfix(c[23]),
+ cfix(c[24]),cfix(c[25]),cfix(c[26]),cfix(c[27]),
+ cfix(c[28]),cfix(c[29]),cfix(c[30]),cfix(c[31])
+ ) ;
+ }
+ TRACEN(k_t_fifocontents,"(<)ethframe dump") ;
+ }
+#else
+static inline void dumpmem(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ;
+static inline void dumpmem(const void *address, unsigned int length, const char * label)
+ {
+ }
+static void dumpframe(const void *address, unsigned int length, const char * label) __attribute__ ((unused)) ;
+static void dumpframe(const void *address, unsigned int length, const char * label)
+ {
+ }
+#endif
+
+static void dumpRecFifoGroup(DMA_RecFifoGroup_t * recFifoGroup) __attribute__ ((unused)) ;
+static void dumpRecFifoGroup(DMA_RecFifoGroup_t * recFifoGroup)
+ {
+ TRACEN(k_t_request,"(>)recFifoGroup=%p",recFifoGroup) ;
+ if( recFifoGroup != NULL )
+ {
+ TRACEN(k_t_request,"group_id=%d num_normal_fifos=%d num_hdr_fifos=%d mask=%08x status_ptr=%p",
+ recFifoGroup->group_id,recFifoGroup->num_normal_fifos,recFifoGroup->num_hdr_fifos,recFifoGroup->mask,recFifoGroup->status_ptr
+ ) ;
+ TRACEN(k_t_request,"not_empty=%08x%08x available=%08x%08x threshold_crossed=%08x%08x",
+ recFifoGroup->status_ptr->not_empty[0],recFifoGroup->status_ptr->not_empty[1],
+ recFifoGroup->status_ptr->available[0],recFifoGroup->status_ptr->available[1],
+ recFifoGroup->status_ptr->threshold_crossed[0],recFifoGroup->status_ptr->threshold_crossed[1]
+ ) ;
+ TRACEN(k_t_request,"fifos[0] global_fifo_id=%d type=%d num_packets_processed_since_moving_fifo_head=%d",
+ recFifoGroup->fifos[0].global_fifo_id,
+ recFifoGroup->fifos[0].type,
+ recFifoGroup->fifos[0].num_packets_processed_since_moving_fifo_head
+ ) ;
+ TRACEN(k_t_request,"fifos[0] fifo_hw_ptr=%p free_space=%08x fifo_size=%08x pa_start=%08x va_start=%p va_head=%p va_tail=%p va_end=%p %s",
+ recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr,
+ recFifoGroup->fifos[0].dma_fifo.free_space,
+ recFifoGroup->fifos[0].dma_fifo.fifo_size,
+ recFifoGroup->fifos[0].dma_fifo.pa_start,
+ recFifoGroup->fifos[0].dma_fifo.va_start,
+ recFifoGroup->fifos[0].dma_fifo.va_head,
+ recFifoGroup->fifos[0].dma_fifo.va_tail,
+ recFifoGroup->fifos[0].dma_fifo.va_end,
+ (recFifoGroup->fifos[0].dma_fifo.free_space != recFifoGroup->fifos[0].dma_fifo.fifo_size) ? "!!!" : ""
+ ) ;
+ if( recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr != NULL )
+ {
+ TRACEN(k_t_request,"hwfifos[0] pa_start=%08x pa_end=%08x pa_head=%08x pa_tail=%08x %s",
+ recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_start,
+ recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_end,
+ recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_head,
+ recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_tail,
+ (recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_head != recFifoGroup->fifos[0].dma_fifo.fifo_hw_ptr->pa_tail) ? "!!!" : ""
+ ) ;
+ }
+ }
+ TRACEN(k_t_request,"(<)") ;
+
+ }
+
+static void dumpInjFifoGroup(DMA_InjFifoGroup_t * injFifoGroup) __attribute__ ((unused)) ;
+static void dumpInjFifoGroup(DMA_InjFifoGroup_t * injFifoGroup)
+ {
+ TRACEN(k_t_request,"(>)injFifoGroup=%p",injFifoGroup) ;
+ if( injFifoGroup != NULL )
+ {
+ DMA_InjFifoStatus_t *injStatus = injFifoGroup->status_ptr ;
+ int x ;
+ TRACEN(k_t_request,"status_ptr=%p permissions=0x%08x group_id=%d",
+ injFifoGroup->status_ptr, injFifoGroup->permissions, injFifoGroup->group_id) ;
+ if( injStatus)
+ {
+ unsigned int available = injStatus->available ;
+ TRACEN(k_t_request,"status not_empty=0x%08x available=0x%08x threshold_crossed=0x%08x activated=0x%08x",
+ injStatus->not_empty, available, injStatus->threshold_crossed, injStatus->activated
+ ) ;
+ for( x=0; x<DMA_NUM_INJ_FIFOS_PER_GROUP; x+=1)
+ {
+ if( (0x80000000 >> x) & available)
+ {
+ DMA_InjFifo_t *fifo=injFifoGroup->fifos+x ;
+ DMA_FifoHW_t *hw_ptr = fifo->dma_fifo.fifo_hw_ptr ;
+ if( fifo->occupiedSize)
+ {
+ TRACEN(k_t_request, " fifos[%d] fifo_id=%d desc_count=0x%08x%08x occupiedSize=0x%08x priority=%d local=%d ts_inj_map=0x%02x %s",
+ x, fifo->fifo_id, (unsigned int)(fifo->desc_count >> 32),(unsigned int)(fifo->desc_count), fifo->occupiedSize, fifo->priority, fifo->local, fifo->ts_inj_map,
+ (fifo->occupiedSize) ? "!!!" : ""
+ ) ;
+ }
+ if( fifo->dma_fifo.va_head != fifo->dma_fifo.va_tail)
+ {
+ TRACEN(k_t_request," fifos[%d] fifo_hw_ptr=%p free_space=%08x fifo_size=%08x pa_start=%08x va_start=%p va_head=%p va_tail=%p va_end=%p",
+ x,
+ hw_ptr,
+ fifo->dma_fifo.free_space,
+ fifo->dma_fifo.fifo_size,
+ fifo->dma_fifo.pa_start,
+ fifo->dma_fifo.va_start,
+ fifo->dma_fifo.va_head,
+ fifo->dma_fifo.va_tail,
+ fifo->dma_fifo.va_end
+ ) ;
+ }
+ if( hw_ptr)
+ {
+ if( hw_ptr->pa_head != hw_ptr->pa_tail)
+ {
+ TRACEN(k_t_request," hwfifos[%d] pa_start=%08x pa_end=%08x pa_head=%08x pa_tail=%08x %s",
+ x,
+ hw_ptr->pa_start,
+ hw_ptr->pa_end,
+ hw_ptr->pa_head,
+ hw_ptr->pa_tail,
+ (hw_ptr->pa_head != hw_ptr->pa_tail) ? "!!!" : ""
+ ) ;
+ }
+ }
+ }
+ }
+ }
+ }
+ TRACEN(k_t_request,"(<)") ;
+ }
+
+static void bgp_dma_tcp_set_mtu(dma_tcp_t *dma_tcp, unsigned int mtu) __attribute__ ((unused)) ;
+static void bgp_dma_tcp_set_mtu(dma_tcp_t *dma_tcp, unsigned int mtu)
+ {
+ unsigned int max_packets_per_frame=(mtu+k_torus_link_payload_size-1) / k_torus_link_payload_size ;
+ unsigned int max_packets_per_frame2=(mtu+k_injection_packet_size-1) / k_injection_packet_size ;
+ unsigned int mtu1=max_packets_per_frame * k_torus_link_payload_size + k_torus_skb_alignment ;
+ unsigned int mtu2=max_packets_per_frame2 * k_injection_packet_size + k_torus_skb_alignment ;
+ dma_tcp->max_packets_per_frame = max_packets_per_frame ;
+ dma_tcp->mtu = (mtu1>mtu2) ? mtu1 : mtu2 ;
+ }
+
+/* Test if we think a socket is affected by torus congestion */
+unsigned int bgp_torus_congestion(struct sock *sk) ;
+
+
+static inline unsigned int stack_pointer(void)
+{
+ uint32_t value;
+ asm volatile ("mr %0,1": "=r" (value) );
+ return value;
+}
+
+/* Fragment reassembly control for 'frames' */
+/*
+ * When the first packet of a frame arrives, examine the eth and ip headers to allocate a skbuff which will have
+ * enough data for the frame. Arrange to assemble the first fragment into the data area.
+ *
+ * When the last packet of a fragment arrives, we know whether the frame is complete. If it is a one-frag frame,
+ * hand it off. I
+ */
+
+typedef struct
+{
+ unsigned int frame_size ; /* IP frame size, from IP header */
+ unsigned int frag_size ; /* fragment size */
+ unsigned int frag_pad_head ; /* Displacement of first byte of first fragment from alignment */
+ unsigned int fragment_index ; /* Index of fragment, starts at 0 */
+ unsigned int bytes_accounted_for ; /* Number of bytes in accounted for including the current fragment */
+ unsigned char * frag_base ; /* Where to pack this frag down to */
+ unsigned char * frag_data ; /* First byte free after current fragment is received */
+ unsigned char * frag_payload ; /* Aligned address to drop first packet of next fragment into skb */
+} fragment_reassembler;
+
+static inline fragment_reassembler * frag_re(struct sk_buff *skb)
+{
+ return (fragment_reassembler *) &(skb->cb) ;
+}
+
+void dma_tcp_show_reception(dma_tcp_t * dma_tcp) ;
+
+int proc_do_dma_rec_counters(struct ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos) ;
+extern int bgp_dma_tcp_counter_copies[DMA_NUM_COUNTERS_PER_GROUP] ;
+static void show_dma_descriptor(DMA_InjDescriptor_t *d) __attribute((unused)) ;
+static void show_dma_descriptor(DMA_InjDescriptor_t *d)
+{
+ unsigned int * di = (unsigned int *) d ;
+ TRACEN(k_t_request,"DMA_InjDescriptor_t(0x%08x 0x%08x 0x%08x 0x%08x (0x%08x 0x%08x 0x%08x 0x%08x))",
+ d->word1, d->word2, d->base_offset, d->msg_length, d->hwHdr.word0, d->hwHdr.word1, d->hwHdr.word2, d->hwHdr.word3) ;
+ TRACEN(k_t_request,"prefetch_only=%d local_copy=%d",(di[0] >> 1)& 1,di[0] & 1) ;
+}
+
+typedef struct
+{
+ long long int sxx ;
+ long long int sxy ;
+/* long long int m0 ; */
+/* long long int m1 ; */
+/* long long int det ; */
+ int s1 ;
+ int sx ;
+ int sy ;
+} dma_statistic_t ;
+extern dma_statistic_t bgp_dma_rate ;
+
+enum {
+ k_injCounterId = 0 // Injection counter number to use
+} ;
+
+/* Support for freeing 'a few' skbuffs when outbound DMA is complete each time we go around */
+enum {
+ k_skb_group_count = 8
+};
+typedef struct {
+ unsigned int count ;
+ struct sk_buff * group[k_skb_group_count] ;
+} skb_group_t ;
+static void skb_group_init(skb_group_t * skb_group) __attribute__((unused)) ;
+static void skb_group_init(skb_group_t * skb_group)
+{
+ skb_group->count = 0 ;
+}
+
+
+static void skb_group_add(skb_group_t * skb_group, struct sk_buff * skb) __attribute__((unused)) ;
+static void skb_group_add(skb_group_t * skb_group, struct sk_buff * skb)
+{
+ unsigned int count=skb_group->count ;
+ if( count < k_skb_group_count )
+ {
+ skb_group->group[count] = skb ;
+ TRACEN(k_t_general,"Queueing skb_group->group[%d]=%p for free",count,skb) ;
+ skb_group->count = count+1 ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"Overrunning queue of skbs to free skb=%p",skb) ;
+ dev_kfree_skb(skb) ;
+ }
+}
+static void skb_group_free(skb_group_t * skb_group) __attribute__((unused)) ;
+static void skb_group_free(skb_group_t * skb_group)
+{
+ unsigned int count=skb_group->count ;
+ unsigned int index ;
+ struct sk_buff ** skb_array=skb_group->group ;
+ BUG_ON(count > k_skb_group_count) ;
+ if( count > k_skb_group_count) count=k_skb_group_count ;
+ for(index=0;index<count;index+=1)
+ {
+ TRACEN(k_t_general,"freeing skb_array[%d]=%p",index,skb_array[index]) ;
+ if( skb_array[index])
+ {
+ dev_kfree_skb(skb_array[index]) ;
+ skb_array[index]=NULL ;
+ }
+ }
+}
+
+static void skb_group_queue_seq(skb_group_t * group, struct sk_buff ** skb_array, unsigned int count
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ , unsigned int core, unsigned int desired_fifo, unsigned long long now, unsigned int x
+#endif
+ )
+{
+ unsigned int index ;
+
+ for( index=0 ; index<count; index+=1)
+ {
+ if( skb_array[index])
+ {
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ struct sk_buff *skb=skb_array[index] ;
+ unsigned long long lifetime_in_fifo = now - *(unsigned long long *) skb_array[index]->cb ;
+ TRACEN(k_t_detail ,"core=%d desired_fifo=%d lifetime=0x%016llx",core, desired_fifo,lifetime_in_fifo) ;
+ if( skb->len >= 4096 && desired_fifo < k_injecting_directions && lifetime_in_fifo > max_lifetime_by_direction[desired_fifo])
+ {
+ max_lifetime_by_direction[desired_fifo] = lifetime_in_fifo ;
+ }
+ if( skb->len >= 4096 && lifetime_in_fifo > 0x7fffffff)
+ {
+ struct sock *sk=skb->sk ;
+ struct inet_sock *inet = inet_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ unsigned int daddr=inet->daddr ;
+ unsigned int flags = TCP_SKB_CB(skb)->flags ;
+ TRACEN(k_t_congestion,"sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u x=%d in-fifo-time=0x%016llx",
+ sk, skb, skb->data, skb->len, flags,
+ daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+ x+index,
+ lifetime_in_fifo
+ ) ;
+ }
+#endif
+ skb_group_add(group,skb_array[index]) ;
+ skb_array[index] = NULL ;
+ }
+ }
+}
+static void skb_group_queue(skb_group_t * group, struct sk_buff ** skb_array, unsigned int start, unsigned int count
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ , unsigned int core, unsigned int desired_fifo, unsigned long long now
+#endif
+ ) __attribute__ ((unused)) ;
+static void skb_group_queue(skb_group_t * group, struct sk_buff ** skb_array, unsigned int start, unsigned int count
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ , unsigned int core, unsigned int desired_fifo, unsigned long long now
+#endif
+ )
+{
+ TRACEN(k_t_detail , "Queuing skbs for freeing start=%d count=%d", start, count) ;
+ if( start+count <= k_injection_packet_count)
+ {
+ skb_group_queue_seq(group,skb_array+start, count
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ , core, desired_fifo, now, 0
+#endif
+ ) ;
+ }
+ else
+ {
+ skb_group_queue_seq(group,skb_array+start, k_injection_packet_count-start
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ , core, desired_fifo, now,0
+#endif
+ ) ;
+ skb_group_queue_seq(group,skb_array, count - (k_injection_packet_count-start)
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ , core, desired_fifo, now,k_injection_packet_count-start
+#endif
+ ) ;
+ }
+
+}
+
+/* We will be using the injection machinery as circular buffers; this is the 'circle' function */
+static inline unsigned int packet_mod(unsigned int index)
+ {
+ return index & (k_injection_packet_count-1) ;
+ }
+
+/* Try to minimise the 'needless' spins if several cores try to inject contemporaneously -- not anymore, best not to overtake on a path */
+static inline int injection_group_hash(dma_tcp_t *dma_tcp,int x,int y, int z)
+{
+/* return 0 ; */
+ return ( x/2 + y/2 + z/2 ) & 3 & (dma_tcp->tuning_injection_hashmask);
+}
+
+#if defined(BARRIER_WITH_IOCTL)
+
+static inline void timing_histogram(dma_tcp_t * dma_tcp)
+{
+ int current_tbl=get_tbl() ;
+ int delta_tbl=current_tbl-dma_tcp->prev_tbl ;
+ dma_tcp->timing_histogram_buckets[fls(delta_tbl)] += 1 ;
+ dma_tcp->prev_tbl = current_tbl ;
+
+}
+#endif
+
+
+static inline int wrapped_DMA_InjFifoInjectDescriptorById(
+ DMA_InjFifoGroup_t *fg_ptr,
+ int fifo_id,
+ DMA_InjDescriptor_t *desc
+ )
+{
+ int rc ;
+ rc = DMA_InjFifoInjectDescriptorById(fg_ptr,fifo_id,desc) ;
+ return rc ;
+}
+
+
+
+/* #define AUDIT_HEADLEN */
+/* #define TRACK_LIFETIME_IN_FIFO */
+
+typedef struct
+{
+ DMA_InjDescriptor_t desc ;
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ unsigned long long injection_timestamp ;
+#endif
+#if defined(AUDIT_HEADLEN)
+ unsigned short tot_len ;
+#endif
+ char free_when_done ;
+} frame_injection_cb ;
+extern unsigned int tot_len_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ; // TODO: fix the name if we leave it extern ...
+
+#if defined(AUDIT_FRAME_HEADER)
+typedef struct {
+ struct ethhdr eth ;
+ struct iphdr iph ;
+} frame_header_t ;
+extern frame_header_t all_headers_in_counters[DMA_NUM_COUNTERS_PER_GROUP] ; // TODO: fix the name if we leave it extern ...
+#endif
+
+static void dma_tcp_show_reception_one(dma_tcp_t * dma_tcp, unsigned int x, unsigned int counter_value) __attribute__((unused)) ;
+static void dma_tcp_show_reception_one(dma_tcp_t * dma_tcp, unsigned int x, unsigned int counter_value)
+{
+ struct sk_buff *skb=dma_tcp->rcv_skbs[x] ;
+ if( skb)
+ {
+ struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+ unsigned int eth_proto = eth->h_proto ;
+
+ struct iphdr *iph = (struct iphdr *) (eth+1) ;
+ unsigned int tot_len=iph->tot_len ;
+ unsigned int saddr=iph->saddr ;
+ if( tot_len != tot_len_for_rcv[x])
+ {
+ TRACEN(k_t_error,"(!!!) tot_len trampled") ;
+ }
+
+ TRACEN(k_t_request,"(---) skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d slot=0x%08x conn_id=0x%02x tot_len_for_rcv=0x%04x counter_value=0x%04x",
+ skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff, dma_tcp->slot_for_rcv[x], dma_tcp->conn_for_rcv[x], tot_len_for_rcv[x],counter_value
+ ) ;
+ dumpmem(skb->data,0x42,"eth-ip-tcp header") ;
+ show_dma_descriptor((DMA_InjDescriptor_t *)&skb->cb) ;
+#if defined(AUDIT_FRAME_HEADER)
+ if(memcmp(skb->data,((char *)(all_headers_in_counters+x)),32))
+ {
+ TRACEN(k_t_request,"(!!!) header not as first seen") ;
+ dumpmem(skb->data-14,sizeof(frame_header_t),"header-now") ;
+ dumpmem(all_headers_in_counters+x,sizeof(frame_header_t),"header-in-propose") ;
+
+ }
+#endif
+ }
+ else
+ {
+ TRACEN(k_t_error|k_t_request,"(E) x=%d Counter in use but no skb !",x) ;
+ }
+
+}
+
+void __init
+dma_tcp_diagnose_init(dma_tcp_t *dma_tcp) ;
+
+void __init
+bgp_dma_memcpyInit(dma_tcp_t *dma_tcp) ;
+
+void __init
+dma_tcp_devfs_procfs_init(dma_tcp_t *dma_tcp) ;
+
+#if defined(TRACK_LIFETIME_IN_FIFO)
+extern unsigned long long max_lifetime_by_direction[k_injecting_directions] ;
+#endif
+
+#if defined(CONFIG_BGP_TORUS_DIAGNOSTICS)
+extern int tcp_scattergather_frag_limit ;
+#endif
+
+typedef struct { unsigned char c[240] ; } torus_frame_payload ;
+
+#endif
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c b/drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c
new file mode 100644
index 00000000000000..931ae5365f1502
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp_diagnose.c
@@ -0,0 +1,707 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ *
+ *
+ * Intent: Carry torus packets as messages into memory FIFOs, and interpret them
+ * as eth frames for TCP
+ * Later on, add token-based flow control with a view to preventing
+ * congestion collapse as the machine gets larger and the loading gets higher
+ *
+ ********************************************************************/
+#define REQUIRES_DUMPMEM
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <asm/bitops.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+
+
+
+/* #include "bglink.h" */
+#include <spi/linux_kernel_spi.h>
+
+#include <asm/time.h>
+
+/* #define CONFIG_BLUEGENE_TORUS_TRACE */
+/* #define CRC_CHECK_FRAMES */
+#define VERIFY_TARGET
+/* #define SIDEBAND_TIMESTAMP */
+#include "bgp_dma_tcp.h"
+
+
+
+
+/* void bgp_dma_diag_reissue_rec_counters(dma_tcp_t *dma_tcp) */
+/* { */
+/* unsigned int x; */
+/* for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1) */
+/* { */
+/* struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; */
+/* if( skb) */
+/* { */
+/* frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ; */
+/* TRACEN(k_t_general,"Redriving x=%d skb=%p",x,skb) ; */
+/* inject_dma_descriptor_propose_accept(dma_tcp,0,&ficb->desc) ; */
+/* } */
+/* } */
+/* } */
+
+static inline void show_tx_skbs(tx_t *tx, unsigned int node_count)
+{
+ unsigned int slot_index ;
+ unsigned int conn_id ;
+ unsigned int tx_skb_count = 0 ;
+ for(slot_index=0;slot_index<node_count;slot_index += 1)
+ {
+ for( conn_id=0;conn_id < k_connids_per_node;conn_id += 1)
+ {
+ struct sk_buff * skb=get_tx_skb(tx,slot_index,conn_id) ;
+ if(skb)
+ {
+ struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+ struct iphdr *iph = (struct iphdr *) (eth+1) ;
+ unsigned int tot_len=iph->tot_len ;
+ unsigned int daddr=iph->daddr ;
+ tx_skb_count += 1 ;
+
+ TRACEN(k_t_request,"(---) slot_index=0x%08x conn_id=0x%02x skb=%p tot_len=0x%04x daddr=%d.%d.%d.%d",
+ slot_index,conn_id,skb,tot_len,daddr>>24, (daddr >> 16) & 0xff,(daddr >> 8) & 0xff, daddr & 0xff) ;
+ }
+ }
+ }
+ TRACEN(k_t_request,"tx_skb_count=%d",tx_skb_count) ;
+}
+
+void dma_tcp_show_reception(dma_tcp_t * dma_tcp)
+{
+ int x ;
+ int slot ;
+ unsigned int inUseCount = 0 ;
+ TRACEN(k_t_request,"rec hitZero 0x%08x 0x%08x",DMA_CounterGetHitZero(&dma_tcp->recCounterGroup,0),DMA_CounterGetHitZero(&dma_tcp->recCounterGroup,1)) ;
+ for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1)
+ {
+ bgp_dma_tcp_counter_copies[x] = DMA_CounterGetValueNoMsync(dma_tcp->recCounterGroup.counter+x) ;
+ if( bgp_dma_tcp_counter_copies[x] != 0 || dma_tcp->recCntrInUse[x] != 0)
+ {
+ inUseCount += 1 ;
+ TRACEN(k_t_request,"rec_counter[0x%02x] value=0x%08x inUse=%d", x,bgp_dma_tcp_counter_copies[x],dma_tcp->recCntrInUse[x]) ;
+ if(dma_tcp->recCntrInUse[x])
+ {
+ dma_tcp_show_reception_one(dma_tcp,x,bgp_dma_tcp_counter_copies[x]) ;
+/* struct sk_buff *skb=dma_tcp->rcv_skbs[x] ; */
+/* if( skb) */
+/* { */
+/* struct ethhdr *eth = (struct ethhdr *)(skb->data) ; */
+/* unsigned int eth_proto = eth->h_proto ; */
+/* */
+/* struct iphdr *iph = (struct iphdr *) (eth+1) ; */
+/* unsigned int tot_len=iph->tot_len ; */
+/* unsigned int saddr=iph->saddr ; */
+/* if( tot_len != tot_len_for_rcv[x]) */
+/* { */
+/* TRACEN(k_t_error,"(!!!) tot_len trampled") ; */
+/* } */
+/* */
+/* TRACEN(k_t_request,"(---) skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d slot=0x%08x conn_id=0x%02x tot_len_for_rcv=0x%04x", */
+/* skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff, dma_tcp->slot_for_rcv[x], dma_tcp->conn_for_rcv[x], tot_len_for_rcv[x] */
+/* ) ; */
+/* dumpmem(skb->data,0x42,"eth-ip-tcp header") ; */
+/* show_dma_descriptor((DMA_InjDescriptor_t *)&skb->cb) ; */
+/* #if defined(AUDIT_FRAME_HEADER) */
+/* if(memcmp(skb->data,((char *)(all_headers_in_counters+x)),32)) */
+/* { */
+/* TRACEN(k_t_request,"(!!!) header not as first seen") ; */
+/* dumpmem(skb->data-14,sizeof(frame_header_t),"header-now") ; */
+/* dumpmem(all_headers_in_counters+x,sizeof(frame_header_t),"header-in-propose") ; */
+/* */
+/* } */
+/* #endif */
+/* } */
+/* else */
+/* { */
+/* TRACEN(k_t_error|k_t_request,"(E) x=%d Counter in use but no skb !",x) ; */
+/* } */
+ }
+ }
+ }
+ TRACEN(k_t_request,"inUseCount=%d",inUseCount) ;
+ show_tx_skbs(&dma_tcp->tx_mux,dma_tcp->node_count) ;
+ TRACEN(k_t_request,"skb_queue_len(pending_rcv_skbs)=%d",skb_queue_len(&dma_tcp->balancer.b[0].pending_rcv_skbs)) ;
+ {
+ struct sk_buff *skb = skb_peek(&dma_tcp->balancer.b[0].pending_rcv_skbs) ;
+ if(skb)
+ {
+
+ struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+ unsigned int eth_proto = eth->h_proto ;
+
+ struct iphdr *iph = (struct iphdr *) (eth+1) ;
+ unsigned int tot_len=iph->tot_len ;
+ unsigned int saddr=iph->saddr ;
+ TRACEN(k_t_request,"skb=%p eth_proto=0x%04x tot_len=0x%04x saddr=%d.%d.%d.%d",skb,eth_proto,tot_len,saddr>>24, (saddr >> 16) & 0xff,(saddr >> 8) & 0xff, saddr & 0xff ) ;
+ }
+
+ }
+ for( slot=0;slot<dma_tcp->node_count; slot+=1)
+ {
+ unsigned int proposals_active=get_proposals_active(&dma_tcp->rcvdemux,slot) ;
+ unsigned int count_pending_f=count_pending_flow(&dma_tcp->rcvdemux,slot) ;
+ unsigned int located_counters=0 ;
+ if( proposals_active || count_pending_f )
+ {
+ TRACEN(k_t_request,"slot=0x%08x proposals_active=%d count_pending_flow=%d",slot,proposals_active,count_pending_f) ;
+ }
+ for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1)
+ {
+ struct sk_buff *skb=dma_tcp->rcv_skbs[x] ;
+ if ( skb && slot == dma_tcp->slot_for_rcv[x] )
+ {
+ located_counters += 1 ;
+ }
+ }
+ if( located_counters + count_pending_f != proposals_active || ( 0 == located_counters && count_pending_f > 0 ))
+ {
+ TRACEN(k_t_request|k_t_error,"(E) slot=0x%08x located_counters=%d count_pending_f=%d proposals_active=%d",
+ slot,located_counters,count_pending_f,proposals_active) ;
+ }
+
+ }
+}
+
+int proc_do_dma_rec_counters(struct ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int rc ;
+ dma_tcp_show_reception(&dma_tcp_state ) ;
+ TRACEN(k_t_entryexit,"(>)ctl=%p write=%d len=%d", ctl,write,*lenp) ;
+ rc = proc_dointvec(ctl,write,filp,buffer,lenp,ppos) ;
+ TRACEN(k_t_entryexit,"(<)") ;
+ return rc ;
+
+}
+
+/* Routine to report how full the outgoing FIFOs are */
+void bgp_dma_diag_report_transmission_queue(int __user * report)
+ {
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ unsigned int core ;
+ TRACEN(k_t_general,"report=%p",report) ;
+ for( core=0 ; core<k_injecting_cores; core += 1)
+ {
+ unsigned int desired_fifo ;
+ for(desired_fifo=0; desired_fifo<k_injecting_directions; desired_fifo += 1 )
+ {
+ unsigned int fifo_initial_head = dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].fifo_initial_head ;
+ unsigned int fifo_current_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+ unsigned int fifo_current_tail =
+ (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+ unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+ unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+ unsigned int current_injection_used=packet_mod(tailx-headx) ;
+ put_user(current_injection_used, report) ;
+ report += 1 ;
+ TRACEN(k_t_detail,"core=%d desired_fifo=%d current_injection_used=%d",core,desired_fifo,current_injection_used) ;
+
+ }
+
+
+ }
+ put_user(dma_tcp->qtyFreeRecCounters, report) ;
+ report += 1 ;
+ put_user(flow_count(dma_tcp,k_send_propose_rpc)-flow_count(dma_tcp,k_act_accept_rpc), report) ;
+ report += 1 ;
+ put_user(flow_count(dma_tcp,k_act_propose_rpc)-flow_count(dma_tcp,k_send_accept_rpc), report) ;
+ }
+static int issueDiagnose(
+ DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ dma_tcp_t * dma_tcp,
+ void * request ,
+ int payload_bytes,
+ unsigned int src_key,
+ int Put_Offset
+ )
+ {
+ unsigned int *payload=(unsigned int *)request ;
+ TRACEN(k_t_request,"src_key=0x%08x Put_Offset=0x%08x payload_bytes=0x%02x [%08x %08x %08x %08x]",
+ src_key,Put_Offset, payload_bytes,payload[0],payload[1],payload[2],payload[3]) ;
+ return 0 ;
+ }
+
+static int issueDiagnoseActor(DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ )
+ {
+ unsigned int SW_Arg=packet_ptr->SW_Arg ;
+ int Put_Offset=packet_ptr->Put_Offset ;
+ enable_kernel_fp() ; // TODO: don't think this is needed nowadays
+
+ TRACEN(k_t_detail,"recv_func_parm=%p payload_ptr=%p SW_Arg=0x%08x payload_bytes=0x%08x Put_Offset=0x%08x",
+ recv_func_parm,payload_ptr,SW_Arg,payload_bytes,Put_Offset) ;
+ return issueDiagnose(
+ f_ptr,
+ packet_ptr,
+ (dma_tcp_t *) recv_func_parm,
+ (void *) payload_ptr,
+ payload_bytes,
+ SW_Arg,
+ Put_Offset
+ ) ;
+ }
+static inline int inject_into_dma_diag_sync(dma_tcp_t *dma_tcp, void * address, unsigned int length, unsigned int x, unsigned int y, unsigned int z, unsigned int my_injection_group, unsigned int desired_fifo, unsigned int SW_Arg ,
+ unsigned int proto_start )
+ {
+ dma_addr_t dataAddr ;
+ DMA_InjDescriptor_t desc;
+ int ret1, ret2 __attribute__((unused));
+ unsigned int firstpacketlength = length ;
+ TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo);
+ dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+
+/* First injection is 'start of frame/fragment' */
+ ret1 = DMA_TorusMemFifoDescriptor( &desc,
+ x, y, z,
+ k_ReceptionFifoGroup, /* recv fifo grp id */
+ 0, /* hints */
+ k_VC_anyway, /* vc - adaptive */
+ SW_Arg, /* softw arg */
+ proto_start, /* function id */
+ k_InjectionCounterGroup, /* inj cntr group id */
+ k_injCounterId, /* inj counter id */
+ dataAddr, /* send address */
+ firstpacketlength /* msg len */
+ );
+
+#if defined(SIDEBAND_TIMESTAMP)
+ {
+ unsigned long now_lo=get_tbl() ;
+ DMA_DescriptorSetPutOffset(&desc,((-length) & 0x0000ffff ) | (now_lo & 0xffff0000)) ;
+
+ }
+#else
+ DMA_DescriptorSetPutOffset(&desc,-length) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to indicate the message (fragment) length */
+#endif
+ ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+ dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+ &desc );
+ TRACEN(k_t_general , "(<)proto_start=%d firstpacketlength=%d ret1=%d ret2=%d",proto_start,firstpacketlength,ret1, ret2);
+ return 1 ;
+ }
+
+static void bgp_dma_diag_drive_sync_at(dma_tcp_t *dma_tcp, int x,int y,int z, int sendBytes)
+{
+ unsigned int desired_fifo= select_transmission_fifo(dma_tcp,x,y,z) ;
+ unsigned long flags ;
+ unsigned int current_injection_used=0xffffffff ;
+ unsigned int aligned_payload_address = (unsigned int)dma_tcp->diag_block_buffer ;
+ unsigned int aligned_payload_length = sendBytes ;
+ unsigned int pad_head = 0 ;
+
+ int ret = 0;
+ int ring_ok ;
+
+ int my_injection_group ;
+ skb_group_t skb_group ;
+ TRACEN(k_t_general ,"(>) at (%02x,%02x,%02x)", x,y,z);
+ skb_group_init(&skb_group) ;
+
+ my_injection_group=injection_group_hash(dma_tcp,x,y,z) ;
+ spin_lock_irqsave(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
+ {
+ unsigned int src_key = (dma_tcp->src_key << 6) | (my_injection_group << 4) | pad_head ;
+ idma_direction_t * buffer = dma_tcp->idma.idma_core[my_injection_group].idma_direction+desired_fifo ;
+ /* Set up the payload */
+ unsigned int bhx = buffer->buffer_head_index ;
+ unsigned int lastx = packet_mod(bhx) ;
+ unsigned int fifo_initial_head = dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].fifo_initial_head ;
+ unsigned int fifo_current_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
+ unsigned int fifo_current_tail =
+ (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
+ unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+ unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+ unsigned int injection_count ;
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ unsigned long long now=get_powerpc_tb() ;
+ *(unsigned long long*)(skb->cb) = now ;
+#endif
+ current_injection_used=packet_mod(tailx-headx) ;
+ /* If the network is backing up, we may have to skip out here, */
+ /* so that we don't overwrite unsent data. */
+ TRACEN(k_t_general ,"Runway desired_fifo=%d headx=%d tailx=%d bhx=%d current_injection_used=%d",
+ desired_fifo,headx,tailx,bhx,current_injection_used) ;
+ if( current_injection_used > buffer->injection_high_watermark )
+ {
+ buffer->injection_high_watermark=current_injection_used ; /* Congestion statistic */
+ }
+ {
+ /* Need to have room to inject the in-skbuff data plus all attached 'fragments', each of which may be sent in 3 injections */
+ if( current_injection_used+3*(MAX_SKB_FRAGS+1) < k_injection_packet_count-1)
+ {
+ ring_ok = 1 ;
+ TRACEN(k_t_general,"Runway slot granted") ;
+ }
+ else
+ {
+ ring_ok = 0 ;
+ TRACEN(k_t_congestion,"Runway slot denied tailx=%08x headx=%08x",tailx,headx) ;
+ }
+ }
+ TRACEN(k_t_general ,"Injection my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x",
+ my_injection_group, desired_fifo, bhx, headx,tailx
+ ) ;
+ if ( ring_ok )
+ {
+ /* We are going to send something. Display its protocol headers .. */
+
+ /* Bump the injection counter. Actually only needs doing once per 4GB or so */
+ ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId, 0xffffffff );
+
+ /* and inject it */
+ {
+
+ injection_count = inject_into_dma_diag_sync(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z,my_injection_group,desired_fifo,
+ src_key,
+ dma_tcp->proto_issue_diag_sync
+ ) ;
+
+
+
+ }
+ {
+ unsigned int nhx=packet_mod(bhx+injection_count) ;
+ /* Record the skbuff so it can be freed later, after data is DMA'd out */
+ dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array[nhx] = NULL ;
+ /* Remember where we will be pushing the next injection in */
+ buffer->buffer_head_index = nhx ;
+ }
+ /* hang on to the skbs until they are sent ... */
+ if( current_injection_used != 0xffffffff)
+ {
+ unsigned int btx = buffer->buffer_tail_index ; /* This indexes the oldest skbuff that might still be pending send by the DMA unit */
+ int skql2 = packet_mod(bhx-btx) ;
+ int count_needing_freeing = skql2-current_injection_used ;
+ int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ;
+ TRACEN(k_t_detail ,"current_injection_used=%d btx=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,btx,skql2,count_needing_freeing,count_to_free);
+ skb_group_queue(&skb_group,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ , my_injection_group, desired_fifo, now
+#endif
+ ) ;
+ btx = packet_mod(btx+count_to_free) ;
+ buffer->buffer_tail_index = btx ;
+ TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index);
+ }
+ }
+ else
+ {
+ TRACEN(k_t_congestion,"Would overrun my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x lastx=%08x",
+ my_injection_group, desired_fifo, bhx, headx,tailx, lastx
+ ) ;
+ }
+ }
+ spin_unlock_irqrestore(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
+ skb_group_free(&skb_group) ;
+ if( k_async_free ) mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ;
+
+ TRACE("(<) desired_fifo=%d",desired_fifo);
+
+}
+static void init_shuffle_vector(unsigned int * shuffle_vector, unsigned int xe, unsigned int ye, unsigned int ze)
+{
+ unsigned int x;
+ unsigned int y;
+ unsigned int z;
+ for( x=0; x<xe; x+=1)
+ {
+ for(y=0;y<ye;y+=1)
+ {
+ for( z=0;z<ze;z+=1)
+ {
+ *shuffle_vector = (x<<16)|(y<<8)|z ;
+ shuffle_vector += 1 ;
+ }
+ }
+ }
+}
+
+static inline int next_prbs(int seed)
+{
+ int ncmask = seed >> 31 ; /* 0x00000000 or 0xffffffff */
+ return (seed << 1) ^ (0x04C11DB7 & ncmask) ; /* CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */
+}
+
+static int scatter_prbs(int seed)
+{
+ int a ;
+ for(a=0;a<32;a+=1)
+ {
+ seed=next_prbs(seed) ;
+ }
+ return seed ;
+}
+static int shuffle_shuffle_vector(unsigned int * shuffle_vector, unsigned int xe, unsigned int ye, unsigned int ze, int seed)
+{
+ unsigned int vsize = xe*ye*ze ;
+ unsigned int vmask = vsize-1 ;
+ unsigned int a ;
+
+ for( a=0; a<vsize;a+=1)
+ {
+ unsigned int b = (seed & vmask) ;
+ unsigned int va = shuffle_vector[a] ;
+ unsigned int vb = shuffle_vector[b] ;
+ shuffle_vector[a] = vb ;
+ shuffle_vector[b] = va ;
+ seed=next_prbs(seed) ;
+
+ }
+ return seed ;
+}
+#if 0
+void dma_tcp_transfer_activate(int sendBytes)
+{
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ int a ;
+ int my_x=dma_tcp->location.coordinate[0] ;
+ int my_y=dma_tcp->location.coordinate[1] ;
+ int my_z=dma_tcp->location.coordinate[2] ;
+ int ext_x=dma_tcp->extent.coordinate[0] ;
+ int ext_y=dma_tcp->extent.coordinate[1] ;
+ int ext_z=dma_tcp->extent.coordinate[2] ;
+ int vsize=ext_x*ext_y*ext_z ;
+ /* Push the 'diagnostic block' through the DMA unit */
+ TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ;
+ dma_tcp->shuffle_seed = shuffle_shuffle_vector(dma_tcp->shuffle_vector,ext_x,ext_y,ext_z,dma_tcp->shuffle_seed) ;
+ for(a=0;a<vsize;a+=1)
+ {
+ unsigned int tg=dma_tcp->shuffle_vector[a] ;
+ unsigned int tg_x=tg>>16 ;
+ unsigned int tg_y=(tg>>8) & 0xff ;
+ unsigned int tg_z=tg & 0xff ;
+ TRACEN(k_t_detail,"shuffle_vector[%d]=0x%08x",a,dma_tcp->shuffle_vector[a]) ;
+ if( my_x != tg_x || my_y != tg_y || my_z != tg_z )
+ {
+ bgp_dma_diag_drive_block_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ;
+ }
+ }
+}
+
+void dma_tcp_transfer_activate_to_one(int sendBytes, unsigned int tg)
+{
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ int my_x=dma_tcp->location.coordinate[0] ;
+ int my_y=dma_tcp->location.coordinate[1] ;
+ int my_z=dma_tcp->location.coordinate[2] ;
+ /* Push the 'diagnostic block' through the DMA unit */
+ TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x tg=0x%08x",sendBytes,tg) ;
+ {
+ unsigned int tg_x=tg>>16 ;
+ unsigned int tg_y=(tg>>8) & 0xff ;
+ unsigned int tg_z=tg & 0xff ;
+ if( my_x != tg_x || my_y != tg_y || my_z != tg_z )
+ {
+ bgp_dma_diag_drive_block_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ;
+ }
+ }
+}
+#endif
+void dma_tcp_transfer_activate_sync(int sendBytes)
+{
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ int a ;
+ int my_x=dma_tcp->location.coordinate[0] ;
+ int my_y=dma_tcp->location.coordinate[1] ;
+ int my_z=dma_tcp->location.coordinate[2] ;
+ int ext_x=dma_tcp->extent.coordinate[0] ;
+ int ext_y=dma_tcp->extent.coordinate[1] ;
+ int ext_z=dma_tcp->extent.coordinate[2] ;
+ int vsize=ext_x*ext_y*ext_z ;
+ /* Push the 'diagnostic block' through the DMA unit */
+ TRACEN(k_t_general,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ;
+ dma_tcp->shuffle_seed = shuffle_shuffle_vector(dma_tcp->shuffle_vector,ext_x,ext_y,ext_z,dma_tcp->shuffle_seed) ;
+ for(a=0;a<vsize;a+=1)
+ {
+ unsigned int tg=dma_tcp->shuffle_vector[a] ;
+ unsigned int tg_x=tg>>16 ;
+ unsigned int tg_y=(tg>>8) & 0xff ;
+ unsigned int tg_z=tg & 0xff ;
+ TRACEN(k_t_detail,"shuffle_vector[%d]=0x%08x",a,dma_tcp->shuffle_vector[a]) ;
+ if( my_x != tg_x || my_y != tg_y || my_z != tg_z )
+ {
+ bgp_dma_diag_drive_sync_at(dma_tcp,tg_x,tg_y,tg_z,sendBytes) ;
+ }
+ }
+}
+
+/* 'across faces' transfer in x,y,z directions, as a 'towards peak performance' test */
+#if 0
+void dma_tcp_transfer_activate_minicube(int sendBytes)
+{
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ int my_x=dma_tcp->location.coordinate[0] ;
+ int my_y=dma_tcp->location.coordinate[1] ;
+ int my_z=dma_tcp->location.coordinate[2] ;
+ /* Push the 'diagnostic block' through the DMA unit */
+ TRACEN(k_t_request,"diagnostic transfer request, sendBytes=0x%08x",sendBytes) ;
+ bgp_dma_diag_drive_block_at(dma_tcp,my_x^1,my_y,my_z,sendBytes) ;
+ bgp_dma_diag_drive_block_at(dma_tcp,my_x,my_y^1,my_z,sendBytes) ;
+ bgp_dma_diag_drive_block_at(dma_tcp,my_x,my_y,my_z^1,sendBytes) ;
+}
+
+int dma_tcp_transfer_wait(int demandCount)
+{
+ int spincount = 0 ;
+ TRACEN(k_t_request,"(>) demandCount=%d",demandCount) ;
+ while(DiagEndCount < demandCount && spincount < 100 )
+ {
+ int rc ;
+ set_current_state(TASK_INTERRUPTIBLE);
+ rc=schedule_timeout(1) ;
+ if( 0 != rc) break ;
+ spincount += 1 ;
+ }
+ TRACEN(k_t_request,"(<) DiagEndCount=%d spincount=%d",DiagEndCount,spincount) ;
+ return DiagEndCount >= demandCount ;
+}
+#endif
+#if defined(BARRIER_WITH_IOCTL)
+volatile static int DiagSyncCount ;
+
+static int issueInlineFrameDiagSync(
+ DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ dma_tcp_t * dma_tcp,
+ void * request ,
+ int payload_bytes,
+ unsigned int src_key,
+ int Put_Offset
+ )
+ {
+ timing_histogram(dma_tcp) ;
+ DiagSyncCount += 1 ;
+ return 0 ;
+ }
+
+static int issueInlineFrameDiagSyncActor(DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ )
+ {
+ unsigned int SW_Arg=packet_ptr->SW_Arg ;
+ int Put_Offset=packet_ptr->Put_Offset ;
+
+ enable_kernel_fp() ; // TODO: don't think this is needed nowadays
+ TRACEN(k_t_detail,"recv_func_parm=%p payload_ptr=%p SW_Arg=0x%08x payload_bytes=0x%08x Put_Offset=0x%08x",
+ recv_func_parm,payload_ptr,SW_Arg,payload_bytes,Put_Offset) ;
+ return issueInlineFrameDiagSync(
+ f_ptr,
+ packet_ptr,
+ (dma_tcp_t *) recv_func_parm,
+ (void *) payload_ptr,
+ payload_bytes,
+ SW_Arg,
+ Put_Offset
+ ) ;
+ }
+
+#endif
+
+int dma_tcp_transfer_wait_sync(int demandCount)
+{
+ int spincount = 0 ;
+ TRACEN(k_t_general,"(>) demandCount=%d",demandCount) ;
+ while(DiagSyncCount < demandCount && spincount < 100 )
+ {
+ int rc ;
+ set_current_state(TASK_INTERRUPTIBLE);
+ rc=schedule_timeout(1) ;
+ if( 0 != rc) break ;
+ spincount += 1 ;
+ }
+ TRACEN(k_t_general,"(<) DiagSyncCount=%d spincount=%d",DiagSyncCount,spincount) ;
+ return DiagSyncCount >= demandCount ;
+}
+
+void dma_tcp_transfer_clearcount(void)
+{
+ TRACEN(k_t_general,"count cleared") ;
+/* DiagEndCount = 0 ; */
+ DiagSyncCount = 0 ;
+}
+
+void __init
+dma_tcp_diagnose_init(dma_tcp_t *dma_tcp)
+ {
+#if defined(BARRIER_WITH_IOCTL)
+ dma_tcp->diag_block_buffer=allocate_diag_block_buffer() ;
+ dma_tcp->shuffle_vector=allocate_shuffle_vector(dma_tcp->extent.coordinate[0],dma_tcp->extent.coordinate[1],dma_tcp->extent.coordinate[2]) ;
+ dma_tcp->shuffle_seed = scatter_prbs(dma_tcp->SW_Arg + 1) ;
+ init_shuffle_vector(dma_tcp->shuffle_vector,dma_tcp->extent.coordinate[0],dma_tcp->extent.coordinate[1],dma_tcp->extent.coordinate[2]) ;
+ dma_tcp->proto_issue_diag_sync=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDiagSyncActor, dma_tcp, 0, 0);
+ memset(dma_tcp->timing_histogram_buckets,0,33*sizeof(int)) ;
+#endif
+ dma_tcp->proto_diagnose=DMA_RecFifoRegisterRecvFunction(issueDiagnoseActor, dma_tcp, 0, 0);
+
+ }
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp_frames.c b/drivers/net/bgp_torus/bgp_dma_tcp_frames.c
new file mode 100644
index 00000000000000..3e9fd7715756db
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp_frames.c
@@ -0,0 +1,2712 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for sockets over torus
+ *
+ *
+ * Intent: Carry torus packets as messages into memory FIFOs, and interpret them
+ * as eth frames for TCP
+ * Later on, add token-based flow control with a view to preventing
+ * congestion collapse as the machine gets larger and the loading gets higher
+ *
+ ********************************************************************/
+#define REQUIRES_DUMPMEM
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <asm/bitops.h>
+#include <linux/vmalloc.h>
+
+#include <linux/dma-mapping.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp.h>
+
+
+
+/* #include "bglink.h" */
+#include <spi/linux_kernel_spi.h>
+
+#include <asm/time.h>
+
+/* #define CONFIG_BLUEGENE_TORUS_TRACE */
+/* #define CRC_CHECK_FRAMES */
+#define VERIFY_TARGET
+/* #define SIDEBAND_TIMESTAMP */
+
+#include "bgp_dma_tcp.h"
+#include "bgp_bic_diagnosis.h"
+
+
+static inline void frames_receive_torus(dma_tcp_t *dma_tcp,struct sk_buff * skb)
+{
+#if defined(CONFIG_BGP_STATISTICS)
+ struct ethhdr *eth = (struct ethhdr *) (skb->data) ;
+ struct iphdr *iph=(struct iphdr *) (eth+1) ;
+ dma_tcp->bytes_received += iph->tot_len ;
+#endif
+ bgtornet_receive_torus(skb);
+}
+
+#if defined(TRACK_LIFETIME_IN_FIFO)
+unsigned long long max_lifetime_by_direction[k_injecting_directions] ;
+#endif
+
+static void diag_skb_structure(struct sk_buff *skb)
+{
+ int f=skb_shinfo(skb)->nr_frags ;
+ if(0 == f)
+ {
+ TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=0 [0x%04x]",skb->len, skb->data_len, skb_headlen(skb)) ;
+ }
+ else if(1 == f)
+ {
+ TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=1 [0x%04x 0x%04x]",skb->len, skb->data_len, skb_headlen(skb),
+ skb_shinfo(skb)->frags[0].size
+ ) ;
+ }
+ else if(2 == f)
+ {
+ TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=2 [0x%04x 0x%04x 0x%04x]",skb->len, skb->data_len, skb_headlen(skb),
+ skb_shinfo(skb)->frags[0].size,
+ skb_shinfo(skb)->frags[1].size
+ ) ;
+ }
+ else
+ {
+ TRACEN(k_t_sgdiag,"len=0x%04x data_len=0x%04x frags=%d [0x%04x 0x%04x 0x%04x 0x%04x ..]",skb_shinfo(skb)->nr_frags,
+ skb->len, skb->data_len, skb_headlen(skb),
+ skb_shinfo(skb)->frags[0].size,
+ skb_shinfo(skb)->frags[1].size,
+ skb_shinfo(skb)->frags[2].size
+ ) ;
+ }
+ if( TRACING(k_t_sgdiag_detail))
+ {
+ unsigned int dump_length = ( skb_headlen(skb) < 256 ) ? skb_headlen(skb) : 256 ;
+ dumpmem(skb->data, dump_length, "skb_head") ;
+ }
+}
+
+static inline int torus_frame_payload_memcpy(
+ torus_frame_payload * target,
+ torus_frame_payload * source
+ )
+{
+ *target = *source ;
+ return 0 ;
+}
+
+/* This is as per the powerpc <asm/time.h> 'get_tb' */
+/* Dup'd here because we have to compile with ppc also, which doesn't have it defined */
+static inline u64 get_powerpc_tb(void)
+{
+ unsigned int tbhi, tblo, tbhi2;
+
+ tbhi = get_tbu();
+ tblo = get_tbl();
+ tbhi2 = get_tbu();
+ /* tbhi2 might be different from tbhi, but that would indicate that there had been a 32-bit carry.
+ * In that case (tbhi2,0) would be a reasonable representation of the timestamp that we usually
+ * think of as being (tbhi,tblo)
+ */
+ if( tbhi == tbhi2)
+ {
+ return ((u64)tbhi << 32) | tblo;
+ }
+ return ((u64)tbhi2 << 32) ;
+}
+static void display_skb_structure(struct sk_buff *skb) ;
+
+static torus_frame_payload dummy_payload __attribute__((aligned(16)));
+static inline void demux_vacate_slot(dma_tcp_t * dma_tcp, unsigned int slot)
+ {
+ set_rcv_payload(&dma_tcp->rcvdemux, slot, (char *)&dummy_payload);
+ set_rcv_payload_alert(&dma_tcp->rcvdemux, slot, (char *)&dummy_payload);
+ set_rcv_expect(&dma_tcp->rcvdemux, slot, 0xffffffff);
+ set_rcv_skb(&dma_tcp->rcvdemux, slot, NULL);
+ TRACEN(k_t_general,"Slot %d vacated", slot );
+ }
+
+static inline void demux_show_slot(dma_tcp_t * dma_tcp, unsigned int slot)
+ {
+ void *payload = get_rcv_payload(&dma_tcp->rcvdemux, slot);
+ void *alert = get_rcv_payload_alert(&dma_tcp->rcvdemux, slot);
+ unsigned int expect=get_rcv_expect(&dma_tcp->rcvdemux, slot);
+ struct sk_buff *skb=get_rcv_skb(&dma_tcp->rcvdemux, slot);
+ if( payload != &dummy_payload || expect != 0xffffffff || skb )
+ {
+ TRACEN(k_t_error,"(E) not-vacant slot=%08x (%d %d) payload=%p alert=%p expect=0x%08x skb=%p",
+ slot, slot>>2, slot&3, payload, alert, expect, skb
+ ) ;
+ }
+ }
+
+static void init_demux_table(dma_tcp_t * dma_tcp, unsigned int node_count ) ;
+
+static void init_demux_table(dma_tcp_t * dma_tcp, unsigned int node_count )
+ {
+ unsigned int x ;
+ for( x = 0 ; x < k_slots_per_node*node_count ; x += 1)
+ {
+ demux_vacate_slot(dma_tcp,x) ;
+#if defined(ENABLE_LATENCY_TRACKING)
+ rcv_statistic_clear(&(dma_tcp->rcvdemux.rcv_per_slot_vector[x].latency));
+/* set_min_latency(&dma_tcp->rcvdemux, x, 0x7fffffff) ; */
+/* set_max_latency(&dma_tcp->rcvdemux, x, 0x80000000) ; */
+#endif
+ }
+ }
+
+
+static void show_protocol_header_tx(char * frame) __attribute__ ((unused)) ;
+static void show_protocol_header_tx(char * frame)
+ {
+ int * f = (int *) frame ;
+ TRACEN(k_t_request,"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x",
+ f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9],f[10],f[11],f[12],f[13],f[14],f[15],f[16]
+ );
+ }
+
+static void show_protocol_header_fault(char * frame) __attribute__ ((unused)) ;
+static void show_protocol_header_fault(char * frame)
+ {
+ int * f = (int *) frame ;
+ TRACEN(k_t_error,"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x",
+ f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9],f[10],f[11],f[12],f[13],f[14],f[15],f[16]
+ );
+ }
+
+static void show_protocol_header_rx(char * frame) __attribute__ ((unused)) ;
+static void show_protocol_header_rx(char * frame)
+ {
+ int * f = (int *) frame ;
+ TRACEN(k_t_general,"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x",
+ f[0],f[1],f[2],f[3],f[4],f[5],f[6],f[7],f[8],f[9],f[10],f[11],f[12],f[13],f[14],f[15],f[16]
+ );
+ }
+
+/* Polynomial picked as CRC-32-IEEE 802.3 from http://en.wikipedia.org/wiki/Cyclic_redundancy_check */
+static int frametrace_rx(char * address, int length ) __attribute__ ((unused)) ;
+static int frametrace_rx(char * address, int length )
+ {
+ int * a = (int *) address ;
+ int x ;
+ int csum32 = a[0] ;
+ for(x=1;x<(length/sizeof(int));x+=1)
+ {
+ csum32 = (csum32 << 1 ) ^ a[x] ^ ( (csum32 & 0x80000000) ? 0x04C11DB7 : 0 ) ;
+ }
+ TRACEN(k_t_general,"address=%p length=%d csum32=0x%08x",address,length,csum32) ;
+ return csum32 ;
+ }
+
+static int frametrace_tx(char * address, int length ) __attribute__ ((unused)) ;
+static int frametrace_tx(char * address, int length )
+ {
+ int * a = (int *) address ;
+ int x ;
+ int csum32 = a[0] ;
+ for(x=1;x<(length/sizeof(int));x+=1)
+ {
+ csum32 = (csum32 << 1 ) ^ a[x] ^ ( (csum32 & 0x80000000) ? 0x04C11DB7 : 0 ) ;
+ }
+ TRACEN(k_t_general,"address=%p length=%d csum32=0x%08x",address,length,csum32) ;
+ return csum32 ;
+ }
+
+/* For diagnosis, put the local clock into the packet. Drop 4 lsbs off the 64-bit clock. */
+static unsigned int latency_timestamp(void) __attribute__ ((unused)) ;
+static unsigned int latency_timestamp(void)
+ {
+ unsigned int tbu = get_tbu() ;
+ unsigned int tbl = get_tbl() ;
+ unsigned int tbu2 = get_tbu() ;
+ unsigned int tbl2 = (tbu==tbu2) ? tbl : 0 ;
+ return (tbu2 << 28) | (tbl2 >> 4) ;
+ }
+
+
+
+static void spot_examine_tcp_timestamp(int tsval, int tsecr)
+{
+ if( tsecr != 0 )
+ {
+ int rtt=jiffies-tsecr ;
+ TRACEN(k_t_general,"rtt=%d",rtt) ;
+#if defined(CONFIG_BGP_STATISTICS)
+ rtt_histogram[fls(rtt)] += 1 ;
+#endif
+ }
+ if( tsval != 0 )
+ {
+ int transit=jiffies-tsval ;
+ TRACEN(k_t_general,"transit=%d",transit) ;
+#if defined(CONFIG_BGP_STATISTICS)
+ if( transit >= 0)
+ {
+ transit_histogram[fls(transit)] += 1 ;
+ }
+#endif
+ }
+
+}
+
+static void spot_parse_aligned_timestamp(struct tcphdr *th)
+{
+ __be32 *ptr = (__be32 *)(th + 1);
+ int tsecr ;
+ int tsval ;
+ if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
+ | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) {
+ ++ptr;
+ tsval = ntohl(*ptr);
+ ++ptr;
+ tsecr = ntohl(*ptr);
+#if defined(CONFIG_BGP_TORUS)
+ spot_examine_tcp_timestamp(tsval,tsecr) ;
+#endif
+ }
+}
+
+static void spot_fast_parse_options(struct sk_buff *skb, struct tcphdr *th)
+{
+ if (th->doff == sizeof(struct tcphdr) >> 2) {
+ return;
+ } else if (
+ th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) {
+ spot_parse_aligned_timestamp( th) ;
+ }
+}
+
+static inline void analyse_tcp_flags(dma_tcp_t * dma_tcp,struct sk_buff * skb)
+{
+#if defined(KEEP_TCP_FLAG_STATS)
+ struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+ struct iphdr *iph = (struct iphdr *)(eth+1) ;
+ unsigned int * iph_word = (unsigned int *) iph ;
+ struct tcphdr * tcph = (struct tcphdr *)(iph_word+(iph->ihl)) ;
+ unsigned int eth_proto = eth->h_proto ;
+ unsigned int ip_proto = iph->protocol ;
+ if( eth_proto == ETH_P_IP && ip_proto == IPPROTO_TCP )
+ {
+ unsigned int flag_fin = tcph->fin ;
+ unsigned int flag_syn = tcph->syn ;
+ unsigned int flag_rst = tcph->rst ;
+ unsigned int flag_psh = tcph->psh ;
+ unsigned int flag_ack = tcph->ack ;
+ unsigned int flag_urg = tcph->urg ;
+ unsigned int flag_ece = tcph->ece ;
+ unsigned int flag_cwr = tcph->cwr ;
+ dma_tcp->tcp_received_flag_count[7] += flag_fin ;
+ dma_tcp->tcp_received_flag_count[6] += flag_syn ;
+ dma_tcp->tcp_received_flag_count[5] += flag_rst ;
+ dma_tcp->tcp_received_flag_count[4] += flag_psh ;
+ dma_tcp->tcp_received_flag_count[3] += flag_ack ;
+ dma_tcp->tcp_received_flag_count[2] += flag_urg ;
+ dma_tcp->tcp_received_flag_count[1] += flag_ece ;
+ dma_tcp->tcp_received_flag_count[0] += flag_cwr ;
+ spot_fast_parse_options(skb,tcph) ;
+ }
+
+#endif
+}
+
+static inline int deliver_eagerly(const dma_tcp_t * dma_tcp)
+{
+ return dma_tcp->tuning_deliver_eagerly ;
+}
+/*
+ * Frames from a source generally arrive in the order that they left the sender, but it is possible for some
+ * nondeterminism to be introduced because of adaptive routing and because 'short' frames get sent 'eagerly' rather than
+ * with DMA.
+ * It is desireable to deliver frames for a given TCP session in-order, otherwise the network layer may call for a
+ * 'fast' retransmit (thinking that a frame has been lost). This routine defers out-of-order frames until they can be
+ * presnted in-order.
+ */
+static void deliver_from_slot(dma_tcp_t * dma_tcp, unsigned int slot, unsigned int conn_id, struct sk_buff * skb)
+{
+ if( ! deliver_eagerly(dma_tcp))
+ {
+ unsigned int slot_conn=get_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot) ;
+ unsigned int slot_advancement= (conn_id-slot_conn) & (k_concurrent_receives-1) ;
+ TRACEN(k_t_general,"slot=0x%08x conn_id=0x%08x slot_conn=0x%08x skb=%p slot_advancement=%d",slot,conn_id,slot_conn,skb,slot_advancement) ;
+ #if defined(CONFIG_BGP_STATISTICS)
+ dma_tcp->resequence_histogram[slot_advancement] += 1;
+ #endif
+ if( 0 == slot_advancement)
+ {
+ /* 'oldest' skb has arrived. Deliver it */
+ frames_receive_torus(dma_tcp,skb) ;
+ /* and check if any 'arrivals ahead' can be delivered now */
+ {
+ int x ;
+ struct sk_buff * slot_skb ;
+ for(x=1; x<k_concurrent_receives-1 && (NULL != (slot_skb = get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x))); x+=1)
+ {
+ TRACEN(k_t_general,"Delivering slot=0x%08x conn_id=0x%08x skb=%p",slot,slot_conn+x,slot_skb) ;
+ frames_receive_torus(dma_tcp,slot_skb) ;
+ set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x,NULL) ;
+ }
+ set_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot,slot_conn+x) ;
+ }
+ }
+ else
+ {
+ struct sk_buff * slot_skb_old = get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,conn_id);
+ TRACEN(k_t_general,"Queuing slot=0x%08x conn_id=0x%08x skb=%p skb->len=%d slot_skb_old=%p",slot,conn_id,skb,skb->len,slot_skb_old) ;
+ if( slot_skb_old)
+ {
+ /* Wrapped around all the possible reorder slots. Something seems to have gone missing. */
+ TRACEN(k_t_error,"(E) resequence buffer wrapped, skb=%p conn_id=0x%08x. Delivering ",skb,conn_id) ;
+ /* and check if any 'arrivals ahead' can be delivered now */
+ {
+ int x ;
+ struct sk_buff * slot_skb ;
+ for(x=0; x<k_concurrent_receives-1 && (NULL != (slot_skb = get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x))); x+=1)
+ {
+ TRACEN(k_t_general,"Delivering slot=0x%08x conn_id=0x%08x skb=%p",slot,slot_conn+x,slot_skb) ;
+ frames_receive_torus(dma_tcp,slot_skb) ;
+ set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x,NULL) ;
+ }
+ set_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot,slot_conn+x) ;
+ slot_conn = slot_conn+x ;
+ }
+ if( 0 == ((slot_conn-conn_id) & (k_concurrent_receives-1)))
+ {
+ /* Everything is delivered ... */
+ frames_receive_torus(dma_tcp,skb) ;
+ set_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot,slot_conn+1) ;
+ }
+ else
+ {
+ /* There's another gap, save the skb for future delivery */
+ set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,conn_id,skb) ;
+ }
+
+
+ }
+ else
+ {
+ set_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,conn_id,skb) ;
+ }
+
+ }
+ }
+ else
+ {
+ TRACEN(k_t_general,"slot=0x%08x conn_id=0x%08x skb=%p",slot,conn_id,skb) ;
+ if( TRACING(k_t_sgdiag_detail))
+ {
+ unsigned int dump_length = ( skb_headlen(skb) < 256 ) ? skb_headlen(skb) : 256 ;
+ dumpmem(skb->data, dump_length, "received skb") ;
+ }
+ frames_receive_torus(dma_tcp,skb) ;
+ }
+
+}
+
+static void display_pending_slot(dma_tcp_t * dma_tcp,unsigned int slot)
+{
+#if defined(RESEQUENCE_ARRIVALS)
+ unsigned int slot_conn=get_rcv_conn_pending_delivery(&dma_tcp->rcvdemux,slot) ;
+ int x ;
+ int pending_count=0;
+ for(x=0; x<k_concurrent_receives; x+=1)
+ {
+ struct sk_buff * skb=get_rcv_skb_pending_resequence(&dma_tcp->rcvdemux,slot,slot_conn+x) ;
+ if(skb)
+ {
+ struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+ struct iphdr *iph = (struct iphdr *) (eth+1) ;
+ unsigned int saddr=iph->saddr ;
+ pending_count += 1;
+ TRACEN(k_t_request,
+ "(---) Pending slot=0x%08x slot_conn=0x%02x x=%d skb=%p skb->len=%d tot_len=0x%04x saddr=%d.%d.%d.%d\n",
+ slot,slot_conn & (k_concurrent_receives-1),x,skb,skb->len, iph->tot_len,
+ saddr>>24,
+ (saddr >> 16) & 0xff,
+ (saddr >> 8) & 0xff,
+ saddr & 0xff
+ ) ;
+ }
+ }
+ if( pending_count >0 )
+ {
+ TRACEN(k_t_request,"slot=0x%08x pending_count=%d",slot,pending_count) ;
+ }
+
+#endif
+}
+
+void bgp_dma_tcp_display_pending_slots(dma_tcp_t * dma_tcp, unsigned int nodecount )
+{
+ unsigned int slot ;
+ for( slot=0; slot<nodecount; slot+=1 )
+ {
+ display_pending_slot(dma_tcp,slot) ;
+ }
+}
+
+
+static void issueInlineFrameDataSingle(dma_tcp_t * dma_tcp,
+ void * request ,
+ unsigned int src_key ,
+ int payload_bytes)
+ {
+ unsigned int pad_head = src_key & 0x0f ;
+ TRACEN(k_t_detail | k_t_general,"(>)(%08x)", src_key);
+ if( k_dumpmem_diagnostic)
+ {
+ dumpmem(request,payload_bytes,"issueInlineFrameData") ;
+ }
+ {
+/* We have a packet which represents a complete frame; quite a small frame ... */
+ struct ethhdr *eth = (struct ethhdr *) (request+pad_head) ;
+ struct iphdr *iph = (struct iphdr *)(request+pad_head+sizeof(struct ethhdr)) ;
+ if( eth->h_proto == ETH_P_IP)
+ {
+ unsigned int totlen=iph->tot_len ;
+ int bytes_remaining = totlen+sizeof(struct ethhdr)+pad_head-payload_bytes ;
+ TRACEN(k_t_detail,"Frame total length=%d",totlen) ;
+ if( bytes_remaining <= 0)
+ {
+/* Largest amount of data we might need is ... */
+/* k_injection_packet_size+k_torus_skb_alignment */
+ struct sk_buff * skb = alloc_skb(k_injection_packet_size+k_torus_skb_alignment , GFP_ATOMIC);
+ if(skb )
+ {
+ char * payload ;
+ skb_reserve(skb, k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment);
+ payload = skb->data ;
+/* TODO: rewrite with 'memcpy' or a copy through integer regs, to avoid using FP now this is 'rare' */
+/* torus_frame_payload_load(request) ; */
+/* torus_frame_payload_store(payload) ; */
+ torus_frame_payload_memcpy((torus_frame_payload *)payload,(torus_frame_payload *)request) ;
+ TRACEN(k_t_detail,"(=)(%08x) skb=%p payload=%p bytes_remaining=%d", src_key,skb,skb->data,bytes_remaining);
+ skb_reserve(skb,pad_head) ;
+ skb_put(skb,totlen+sizeof(struct ethhdr)) ;
+ analyse_tcp_flags(dma_tcp, skb) ;
+ deliver_from_slot(dma_tcp,-1,-1,skb) ;
+ }
+ else
+ {
+ TRACEN(k_t_protocol,"(E) (%08x) skb was null", src_key);
+ dma_tcp->device_stats->rx_dropped += 1;
+ if( k_detail_stats)
+ {
+ dma_tcp->count_no_skbuff += 1 ;
+ }
+ }
+ }
+ else
+ {
+ TRACEN(k_t_protocol,"(E) frame does not fit packet, discarded");
+ dma_tcp->device_stats->rx_frame_errors += 1;
+ }
+ }
+ else
+ {
+ TRACEN(k_t_protocol,"Packet not IP ethhdr=[%02x:%02x:%02x:%02x:%02x:%02x][%02x:%02x:%02x:%02x:%02x:%02x](%04x)",
+ eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5],
+ eth->h_source[0],eth->h_source[1],eth->h_source[2],eth->h_source[3],eth->h_source[4],eth->h_source[5],
+ eth->h_proto) ;
+ dma_tcp->device_stats->rx_frame_errors += 1;
+ }
+ }
+ TRACEN(k_t_detail,"(<)((%08x)", src_key);
+ }
+
+static int issueInlineFrameDataSingleActor(DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ )
+ {
+ unsigned int SW_Arg=packet_ptr->SW_Arg ;
+/* enable_kernel_fp() ; // TODO: don't think this is needed nowadays */
+
+ issueInlineFrameDataSingle(
+ (dma_tcp_t *) recv_func_parm,
+ (void *) payload_ptr,
+ SW_Arg,
+ payload_bytes
+ ) ;
+ return 0 ;
+ }
+
+#if defined(USE_ADAPTIVE_ROUTING)
+typedef struct
+{
+ unsigned int conn_id ;
+ unsigned int packet_count ;
+ unsigned int packets_to_go ;
+ int framestart_offset ;
+ int prev_offset ; /* For constructing 'reordering' statistics */
+} adaptive_skb_cb_t;
+
+static void issueInlineFrameDataAdaptive(dma_tcp_t * dma_tcp,
+ void * request ,
+ unsigned int src_key ,
+ int payload_bytes,
+ int Put_Offset
+ )
+ {
+ unsigned int conn_id = ((unsigned int) Put_Offset) >> 25 ;
+ unsigned int packet_count = (((unsigned int) Put_Offset) >> 16) & 0x1ff ;
+ int offset_in_frame = (Put_Offset & 0xfff0) | 0xffff0000 ;
+ unsigned int node_slot_mask=dma_tcp->node_slot_mask ;
+ rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+ unsigned int slot = (src_key >> 4) & node_slot_mask ;
+ unsigned int pad_head = src_key & 0x0f ;
+ struct sk_buff * candidate_skb=get_rcv_skb_for_conn(rcvdemux,slot,conn_id) ;
+ TRACEN(k_t_detail,
+ "(>) request=%p slot=%08x pad_head=0x%08x payload_bytes=0x%02x Put_Offset=0x%08x\n",
+ request,slot,pad_head,payload_bytes,Put_Offset);
+ if( candidate_skb)
+ {
+ adaptive_skb_cb_t * askb=(adaptive_skb_cb_t *)(candidate_skb->cb) ;
+ if(askb->conn_id != conn_id || askb->packet_count != packet_count)
+ {
+ TRACEN(k_t_error,"(E) askb mismatch, slot=%08x askb->conn_id=%04x conn_id=%04x askb->packet_count=%04x packet_count=%04x askb->packets_to_go=%04x",
+ slot,askb->conn_id,conn_id,askb->packet_count,packet_count,askb->packets_to_go) ;
+ dev_kfree_skb(candidate_skb) ;
+ candidate_skb = NULL ;
+ }
+ }
+ if( NULL == candidate_skb)
+ {
+ instrument_flow(dma_tcp,k_receive_eager) ;
+ candidate_skb=alloc_skb(packet_count*k_injection_packet_size+2*k_torus_skb_alignment+k_injection_packet_size,GFP_ATOMIC) ; /* TODO: refine the size */
+ if( candidate_skb)
+ {
+ adaptive_skb_cb_t * askb=(adaptive_skb_cb_t *)(candidate_skb->cb) ;
+ askb->conn_id = conn_id ;
+ askb->packet_count = packet_count ;
+ askb->packets_to_go = packet_count ;
+ askb->framestart_offset = 0 ;
+ askb->prev_offset = -65536 ;
+ skb_reserve(candidate_skb, (k_torus_skb_alignment - ((unsigned int)(candidate_skb->data)) % k_torus_skb_alignment));
+ skb_put(candidate_skb,packet_count*k_injection_packet_size) ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"skbuff allocation failed packet_count=%d slot=0x%08x conn_id=0x%08x",packet_count,slot,conn_id) ;
+ }
+ set_rcv_skb_for_conn(rcvdemux,slot,conn_id,candidate_skb) ;
+ }
+ if( candidate_skb)
+ {
+ unsigned char * end_of_frame=candidate_skb->tail ;
+ unsigned char * target = end_of_frame+offset_in_frame ;
+ int cand_start_offset = offset_in_frame + pad_head ;
+ TRACEN(k_t_detail,"candidate_skb skb=%p head=%p data=%p tail=%p end=%p offset_in_frame=0x%08x target=%p cand_start_offset=0x%08x",
+ candidate_skb,candidate_skb->head,candidate_skb->data,candidate_skb->tail,candidate_skb->end,offset_in_frame,target,cand_start_offset) ;
+ if( target < candidate_skb->head)
+ {
+ TRACEN(k_t_error,"data offset outside skb, dropping packet") ;
+ }
+ else
+ {
+ adaptive_skb_cb_t * askb=(adaptive_skb_cb_t *)(candidate_skb->cb) ;
+ int new_packets_to_go=askb->packets_to_go - 1 ;
+ int prev_offset = askb->prev_offset ;
+#if defined(USE_ADAPTIVE_ROUTING)
+/* Statistics, count how often a packet came out-of-order */
+ if( offset_in_frame < prev_offset)
+ {
+ instrument_flow(dma_tcp,k_reordered) ;
+ }
+ askb->prev_offset = offset_in_frame ;
+#endif
+ if( cand_start_offset < askb->framestart_offset )
+ {
+ askb->framestart_offset=cand_start_offset ;
+ }
+
+ TRACEN(k_t_detail,"memcpy(%p,%p,0x%08x) new_packets_to_go=%d",
+ target,request,payload_bytes,new_packets_to_go) ;
+ if( payload_bytes == k_injection_packet_size)
+ {
+ /* doublehummer memcpy optimisation for 'full' packet */
+ /* TODO: rewrite with 'memcpy' or a copy through integer regs, to avoid using FP now this is 'rare' */
+ torus_frame_payload_memcpy((torus_frame_payload *)target,(torus_frame_payload *)request) ;
+ }
+ else
+ {
+ memcpy(target,request,payload_bytes) ;
+ }
+ if( new_packets_to_go <= 0)
+ {
+ analyse_tcp_flags(dma_tcp, candidate_skb) ;
+ skb_reserve(candidate_skb,packet_count*k_injection_packet_size+askb->framestart_offset);
+ dumpframe(candidate_skb->data,candidate_skb->len,"Proposed frame") ;
+ deliver_from_slot(dma_tcp,slot,conn_id,candidate_skb) ;
+ set_rcv_skb_for_conn(rcvdemux,slot,conn_id,NULL) ;
+ }
+ else
+ {
+ askb->packets_to_go = new_packets_to_go ;
+ }
+ }
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) No memory for skb, dropping packet") ;
+ }
+
+ }
+
+static int issueInlineFrameDataAdaptiveActor(DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ )
+ {
+ unsigned int SW_Arg=packet_ptr->SW_Arg ;
+ int Put_Offset=packet_ptr->Put_Offset ;
+/* enable_kernel_fp() ; // TODO: don't think this is needed nowadays */
+
+ issueInlineFrameDataAdaptive(
+ (dma_tcp_t *) recv_func_parm,
+ (void *) payload_ptr,
+ SW_Arg,
+ payload_bytes,
+ Put_Offset
+ ) ;
+ return 0 ;
+ }
+#endif
+
+#if defined(AUDIT_FRAME_HEADER)
+
+frame_header_t all_headers_in_counters[DMA_NUM_COUNTERS_PER_GROUP] ;
+#endif
+unsigned int tot_len_for_rcv[DMA_NUM_COUNTERS_PER_GROUP] ;
+
+static inline void create_dma_descriptor_propose_accept(dma_tcp_t *dma_tcp,
+ void * address,
+ unsigned int length,
+ unsigned int x, unsigned int y, unsigned int z,
+ unsigned int proto,
+ unsigned int SW_Arg,
+ unsigned int conn_id,
+ unsigned int tag,
+ DMA_InjDescriptor_t *desc,
+ unsigned int propose_length
+ )
+{
+ dma_addr_t dataAddr ;
+ int ret1 ;
+ int PutOffset = (conn_id << 25) | (tag << 16) | ((-length) & 0xfff0) ;
+ TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d proto=%d desc=%p",address,length,x,y,z,proto,desc);
+ dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+ ret1 = DMA_TorusMemFifoDescriptor( desc,
+ x, y, z,
+ k_ReceptionFifoGroup, /* recv fifo grp id */
+ 0, /* hints */
+ virtual_channel(dma_tcp,k_VC_anyway), /* vc - adaptive */
+ SW_Arg, /* softw arg */
+ proto, /* function id */
+ k_InjectionCounterGroup, /* inj cntr group id */
+ k_injCounterId, /* inj counter id */
+ dataAddr, /* send address */
+ propose_length /* proposal length */
+ );
+ if(ret1 != 0 )
+ {
+ TRACEN(k_t_error,"(E) ret1=%d",ret1) ;
+ }
+
+ DMA_DescriptorSetPutOffset(desc,PutOffset) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to pass required data to receive actor */
+
+ TRACEN(k_t_general , "(<) ret1=%d",ret1);
+
+}
+
+static inline unsigned int ethhdr_src_x(struct ethhdr * eth)
+{
+ return eth->h_source[3] ;
+}
+static inline unsigned int ethhdr_src_y(struct ethhdr * eth)
+{
+ return eth->h_source[4] ;
+}
+static inline unsigned int ethhdr_src_z(struct ethhdr * eth)
+{
+ return eth->h_source[5] ;
+}
+
+static inline unsigned int ethhdr_dest_x(struct ethhdr * eth)
+{
+ return eth->h_dest[3] ;
+}
+static inline unsigned int ethhdr_dest_y(struct ethhdr * eth)
+{
+ return eth->h_dest[4] ;
+}
+static inline unsigned int ethhdr_dest_z(struct ethhdr * eth)
+{
+ return eth->h_dest[5] ;
+}
+
+#if defined(USE_SKB_TO_SKB)
+static int get_reception_counter(dma_tcp_t * dma_tcp)
+{
+ unsigned int counters_available = dma_tcp->qtyFreeRecCounters ;
+ if( counters_available > 0)
+ {
+ int cx ;
+ int scanRecCounter=dma_tcp->scanRecCounter ;
+ dma_tcp->qtyFreeRecCounters=counters_available-1 ;
+ for(cx=0;cx<DMA_NUM_COUNTERS_PER_GROUP;cx+=1)
+ {
+ int cxx=(scanRecCounter+cx) & (DMA_NUM_COUNTERS_PER_GROUP-1) ;
+ if(0 == dma_tcp->recCntrInUse[cxx])
+ {
+ dma_tcp->scanRecCounter=cxx+1 ;
+ dma_tcp->recCntrInUse[cxx] = 1 ;
+ return cxx ;
+ }
+ }
+ TRACEN(k_t_error,"(E) Should have been %d counters available",counters_available) ;
+ }
+ return -1 ; /* No reception counters available */
+}
+
+enum {
+ k_PSKB_noRecCounter = 0x01 ,
+ k_PSKB_freedRecCounter = 0x02
+};
+typedef struct
+{
+ unsigned int src_key ;
+ unsigned int slot ;
+ unsigned int conn_id ;
+ unsigned short tot_len ;
+ unsigned char pad_head ;
+} propose_skb_cb ;
+
+/* Frame injection control, may live in skb->cb . */
+/* 'desc' describes the 'non-fragmented' initial part of the skb data; code where the ficb is used will */
+/* handle what has to happen to get the 'fragmented' part of the skb sent out */
+enum {
+ k_cattle_class,
+ k_first_class
+};
+
+static int bgp_dma_tcp_s_and_f_frames_prepared(
+ dma_tcp_t *dma_tcp,
+ struct sk_buff *skb,
+ unsigned int queue_at_head,
+ unsigned int transport_class
+ ) ;
+
+static int isProp(dma_tcp_t * dma_tcp,struct ethhdr *eth,struct iphdr *iph)
+{
+ int h_source_x=eth->h_source[3] ;
+ int h_source_y=eth->h_source[4] ;
+ int h_source_z=eth->h_source[5] ;
+ int my_x=dma_tcp->location.coordinate[0] ;
+ int my_y=dma_tcp->location.coordinate[1] ;
+ int my_z=dma_tcp->location.coordinate[2] ;
+
+ if( h_source_x == my_x && h_source_y == my_y && h_source_z == my_z )
+ {
+ TRACEN(k_t_general,"non-propose from (%d,%d,%d)",eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]) ;
+ return 0 ;
+ }
+ return 1 ;
+}
+
+static int bgp_dma_tcp_s_and_f_frames_prepared(
+ dma_tcp_t *dma_tcp,
+ struct sk_buff *skb,
+ unsigned int queue_at_head,
+ unsigned int transport_class
+ ) ;
+
+struct accepthdr {
+ struct iphdr iph ;
+ unsigned int conn_id ;
+ int reception_counter ;
+};
+
+static inline void create_dma_descriptor_direct_put_offset(dma_tcp_t *dma_tcp,
+ unsigned int x, unsigned int y, unsigned int z,
+ int injection_counter,
+ int reception_counter,
+ dma_addr_t dataAddr,
+ int msglen,
+ DMA_InjDescriptor_t *desc,
+ unsigned int offset
+ ) ;
+
+static void display_iphdr(struct iphdr *iph)
+{
+ TRACEN(k_t_request,"iphdr tot_len=0x%04x saddr=0x%08x daddr=0x%08x",iph->tot_len,iph->saddr,iph->daddr) ;
+}
+
+static unsigned int counted_length(struct sk_buff *skb)
+{
+ unsigned int rc=skb_headlen(skb) ;
+ int f ;
+ int nfrags = skb_shinfo(skb)->nr_frags ;
+ struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[0] ;
+ for(f=0; f<nfrags; f+=1)
+ {
+ rc += frag[f].size ;
+ }
+ return rc ;
+
+}
+
+static int audit_skb_at_accept(dma_tcp_t * dma_tcp,struct sk_buff *skb, unsigned int totlen_at_propose, struct iphdr *iph_at_rcv)
+{
+ unsigned int ctlen = counted_length(skb) ;
+ if( totlen_at_propose == 0 || totlen_at_propose > dma_tcp->mtu || totlen_at_propose != iph_at_rcv->tot_len || totlen_at_propose +sizeof(struct ethhdr) != ctlen)
+ {
+ TRACEN(k_t_error,"(E) skb=%p inconsistent, totlen_at_propose=0x%04x iph_at_rcv->tot_len=0x%04x skb->data_len=0x%04x counted_length(skb)=0x%04x",
+ skb, totlen_at_propose, iph_at_rcv->tot_len, skb->data_len, ctlen
+ ) ;
+ display_skb_structure(skb) ;
+ display_iphdr(iph_at_rcv) ;
+ instrument_flow(dma_tcp,k_accept_audit_fail) ;
+ return 1 ;
+ }
+ return 0 ;
+}
+void issue_accept(dma_tcp_t * dma_tcp,struct accepthdr * accepth, unsigned int src_key )
+{
+ unsigned int conn_id=accepth->conn_id ;
+ int reception_counter=accepth->reception_counter ;
+ unsigned int node_slot_mask=dma_tcp->node_slot_mask ;
+ unsigned int slot = (src_key >> 4) & node_slot_mask ;
+ struct sk_buff *skb=get_tx_skb(&dma_tcp->tx_mux,slot,conn_id) ;
+ TRACEN(k_t_general,"src_key=0x%08x conn_id=0x%08x reception_counter=0x%08x",src_key,conn_id,reception_counter) ;
+ instrument_flow(dma_tcp,k_act_accept_rpc) ;
+ if( skb)
+ {
+ struct ethhdr* eth = (struct ethhdr*)(skb->data) ;
+ unsigned int x=ethhdr_dest_x(eth) ;
+ unsigned int y=ethhdr_dest_y(eth) ;
+ unsigned int z=ethhdr_dest_z(eth) ;
+ frame_injection_cb *ficb = (frame_injection_cb *) skb->cb ;
+ unsigned int payload_length = skb_headlen(skb) ;
+ unsigned int payload_address = (unsigned int)(skb->data) ;
+ unsigned int pad_head = payload_address & 0x0f ;
+ unsigned int aligned_payload_length = payload_length + pad_head ;
+ dma_addr_t dataAddr = dma_map_single(NULL, skb->data-pad_head, aligned_payload_length, DMA_TO_DEVICE);
+
+ set_tx_skb(&dma_tcp->tx_mux,slot,conn_id,NULL) ;
+ TRACEN(k_t_general,"Cop from slot=0x%08x conn_id=0x%04x reception_counter=0x%02x skb=%p x=%d y=%d z=%d msglen=0x%04x",
+ slot,conn_id,reception_counter,skb, x,y,z,payload_length+pad_head) ;
+ if(TRACING(k_t_sgdiag))
+ {
+ TRACEN(k_t_sgdiag,"Cop from slot=0x%08x conn_id=0x%04x reception_counter=0x%02x skb=%p x=%d y=%d z=%d msglen=0x%04x",
+ slot,conn_id,reception_counter,skb, x,y,z,payload_length+pad_head) ;
+ diag_skb_structure(skb) ;
+ }
+#if defined(AUDIT_HEADLEN)
+ {
+ int rca = audit_skb_at_accept(dma_tcp,skb,ficb->tot_len,&accepth->iph) ;
+ if( rca)
+ {
+ TRACEN(k_t_error,"(!!!) dropping skb, will cause (x=%d y=%d z=%d) counter 0x%02x to leak", x,y,z,reception_counter) ;
+ dev_kfree_skb(skb) ;
+ return ;
+ }
+ }
+#endif
+ {
+ int transfer_length = k_abbreviate_headlen ? (payload_length+pad_head-eth->h_source[0]) : (payload_length+pad_head) ;
+ dma_addr_t transfer_address = k_abbreviate_headlen ? (dataAddr+eth->h_source[0]) : dataAddr ;
+ unsigned int receive_offset = k_abbreviate_headlen ? eth->h_source[0] : 0 ;
+ if( 0 != transfer_length)
+ {
+ create_dma_descriptor_direct_put_offset(
+ dma_tcp,x, y, z,k_injCounterId,reception_counter,transfer_address,transfer_length,&ficb->desc,receive_offset
+ ) ;
+ }
+ else
+ {
+ TRACEN(k_t_general,"(I) head length is zero") ;
+ /* Set up a descriptor for a non-zero length, then set its length to zero so that code later on can pick up the special case */
+ create_dma_descriptor_direct_put_offset(
+ dma_tcp,x, y, z,k_injCounterId,reception_counter,transfer_address,1,&ficb->desc,receive_offset
+ ) ;
+ ficb->desc.msg_length = 0 ;
+ instrument_flow(dma_tcp,k_headlength_zero) ;
+ }
+ }
+ ficb->free_when_done=1 ;
+ bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp, skb, 0, k_first_class) ;
+
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) Cop from slot=0x%08x conn_id=0x%04x reception_counter=0x%02x skb is null",
+ slot,conn_id,reception_counter ) ;
+ }
+}
+
+static int should_park(dma_tcp_t * dma_tcp,unsigned int proposals_active, unsigned int x0, unsigned int y0, unsigned int z0)
+{
+ unsigned int free_counters = dma_tcp->qtyFreeRecCounters ;
+ unsigned int tuning_counters_per_source = dma_tcp->tuning_counters_per_source ;
+/* unsigned int reported_transmission_fifo = report_transmission_fifo(dma_tcp,x0,y0,z0) ; */
+ return ( tuning_counters_per_source > 0 )
+ ? (proposals_active > tuning_counters_per_source )
+ : ((proposals_active > 1) && (proposals_active * proposals_active > free_counters )) ;
+}
+
+static void stamp_skb(struct sk_buff *skb, unsigned int size )
+{
+ if( skb->data + size <= skb->end)
+ {
+ memset(skb->data,0x11,size) ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) Stamp for 0x%08x bytes out of range, skb=%p head=%p data=%p tail=%p end=%p, skipped",
+ size,skb,skb->head,skb->data,skb->tail,skb->end) ;
+ }
+}
+
+static inline int defer_skb_for_counter(const dma_tcp_t * dma_tcp)
+{
+ return k_allow_defer_skb_for_counter ? dma_tcp->tuning_defer_skb_until_counter : 0 ;
+}
+static void receive_skb_using_counter(dma_tcp_t *dma_tcp,struct sk_buff *skb_next, unsigned int counter_index,
+ unsigned int pad_head, unsigned int slot, unsigned int conn_id,
+ unsigned int x, unsigned int y,unsigned int z,
+ unsigned int tot_len,
+ unsigned int src_key) ;
+static void pending_rcv_skb_queue(dma_tcp_t *dma_tcp, struct sk_buff * skb, unsigned int x0, unsigned int y0, unsigned int z0 )
+{
+/* if( 1 == dma_tcp->tuning_select_fifo_algorithm) */
+/* { */
+/* skb_queue_tail(&dma_tcp->balancer.b[k_pending_rcv_skb_classes-1].pending_rcv_skbs,skb) ; */
+/* } */
+/* else */
+/* { */
+ unsigned int reported_fifo=report_transmission_fifo(dma_tcp,x0,y0,z0) ;
+ TRACEN(k_t_general,"skb=%p would come from fifo=%d on node [%d,%d,%d]",skb,reported_fifo,x0,y0,z0) ;
+ if( reported_fifo < k_pending_rcv_skb_classes)
+ {
+ skb_queue_tail(&dma_tcp->balancer.b[reported_fifo].pending_rcv_skbs,skb) ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"(!!!) skb=%p would come from fifo=%d on node [%d,%d,%d] (out of range)",skb,reported_fifo,x0,y0,z0) ;
+ skb_queue_tail(&dma_tcp->balancer.b[0].pending_rcv_skbs,skb) ;
+ }
+/* } */
+}
+
+static inline int over_quota(bgp_dma_balancer_direction *b)
+{
+ int ql = skb_queue_len(&b->pending_rcv_skbs) ;
+ return ql ? b->outstanding_counters : 0x7fffffff ;
+}
+static struct sk_buff* pending_rcv_skb_dequeue(dma_tcp_t *dma_tcp)
+{
+ unsigned int q=0 ;
+ int qq=over_quota(dma_tcp->balancer.b+0) ;
+ int x ;
+ for(x=1;x<k_pending_rcv_skb_classes;x+=1)
+ {
+ int qp=over_quota(dma_tcp->balancer.b+x) ;
+ if( qp < qq)
+ {
+ qq=qp ;
+ q=x ;
+ }
+ }
+ return skb_dequeue(&dma_tcp->balancer.b[q].pending_rcv_skbs) ;
+}
+
+static void issueProp(dma_tcp_t * dma_tcp,
+ void * request ,
+ unsigned int src_key ,
+ int payload_bytes,
+ int Put_Offset
+ )
+ {
+ unsigned int conn_id = ((unsigned int) Put_Offset) >> 25 ;
+ unsigned int node_slot_mask=dma_tcp->node_slot_mask ;
+ unsigned int slot = (src_key >> 4) & node_slot_mask ;
+ unsigned int pad_head = src_key & 0x0f ;
+
+ struct ethhdr *eth = (struct ethhdr *)(request+pad_head) ;
+ unsigned int eth_proto = eth->h_proto ;
+
+ struct iphdr *iph = (struct iphdr *) (eth+1) ;
+ unsigned int tot_len=iph->tot_len ;
+ if( isProp(dma_tcp,eth,iph))
+ {
+ unsigned int x=ethhdr_src_x(eth) ;
+ unsigned int y=ethhdr_src_y(eth) ;
+ unsigned int z=ethhdr_src_z(eth) ;
+ rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+ unsigned int proposals_active=get_proposals_active(rcvdemux,slot) ;
+ instrument_flow(dma_tcp,k_act_propose_rpc) ;
+ set_proposals_active(rcvdemux,slot,proposals_active+1) ;
+ /* If we're flow controlling by counters, we have a choice here. */
+ /* We can either get on with it, or park it for later when a previously-started frame completes */
+ if( 0 == k_counter_flow_control || ! should_park(dma_tcp,proposals_active,x,y,z) )
+ {
+ int reception_counter=get_reception_counter(dma_tcp) ;
+ TRACEN(k_t_general|k_t_sgdiag,"Prop from slot=0x%08x conn_id=0x%04x eth_proto=0x%04x pad_head=0x%02x tot_len=0x%04x x=0x%02x y=0x%02x z=0x%02x msglen=0x%04x payload_bytes=0x%02x", slot,conn_id,eth_proto,pad_head,tot_len, x, y, z,tot_len+pad_head, payload_bytes) ;
+
+ /* Now we need an 'skbuff' and a reception counter. Reception counters might be scarce */
+ if( reception_counter != -1 )
+ {
+ unsigned int allocation_size=tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment ;/* TODO: refine the size */
+ struct sk_buff *skb = alloc_skb((allocation_size > 256) ? allocation_size : 256, GFP_ATOMIC) ; /* TODO: refine the size */
+ if( skb)
+ {
+ if(k_scattergather_diagnostic) stamp_skb(skb,tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+ skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head);
+ /* Bring in the frame header for diagnosis later ... */
+ memcpy(skb->data-pad_head,request,payload_bytes) ;
+ skb_put(skb,tot_len+sizeof(struct ethhdr)) ;
+ if( k_scattergather_diagnostic) display_skb_structure(skb) ;
+ {
+ receive_skb_using_counter(dma_tcp,skb,reception_counter,pad_head,slot,conn_id,x,y,z,tot_len,src_key) ;
+ }
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) No memory available for skbuff") ;
+ }
+ }
+ else
+ {
+ unsigned int allocation_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes+2*k_torus_skb_alignment) : (tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+ unsigned int put_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes-pad_head) : (tot_len+sizeof(struct ethhdr)) ;
+ /* TODO: Defer allocation of the full-size sk_buff until a reception counter is available */
+ struct sk_buff *skb = alloc_skb((allocation_size > 256) ? allocation_size : 256, GFP_ATOMIC) ; /* TODO: refine the size */
+ TRACEN(k_t_general,"allocation_size=0x%04x put_size=0x%04x skb=%p",allocation_size,put_size,skb) ;
+ instrument_flow(dma_tcp, k_no_reception_counter) ;
+ if( skb)
+ {
+ if(k_scattergather_diagnostic) stamp_skb(skb,allocation_size) ;
+ skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head);
+ /* Bring in the frame header for diagnosis later ... */
+ memcpy(skb->data-pad_head,request,payload_bytes) ;
+ skb_put(skb,put_size) ;
+ if( k_scattergather_diagnostic) display_skb_structure(skb) ;
+ {
+ propose_skb_cb * pskbcb = (propose_skb_cb *)skb->cb ;
+ pskbcb->src_key=src_key ;
+ pskbcb->slot = slot ;
+ pskbcb->conn_id = conn_id ;
+ pskbcb->tot_len = tot_len ;
+ pskbcb->pad_head = pad_head ;
+ }
+ instrument_flow(dma_tcp,k_defer_accept_rpc_counters) ;
+ pending_rcv_skb_queue(dma_tcp,skb,x,y,z) ;
+ TRACEN(k_t_flowcontrol|k_t_general,"No reception counters (%d,%d,%d) skb=%p src_key=0x%08x slot=0x%08x conn_id=0x%08x tot_len=0x%04x pad_head=0x%02x",x,y,z,skb,src_key,slot,conn_id,tot_len,pad_head) ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) No memory available for skbuff") ;
+ }
+ }
+ }
+ else
+ {
+ /* Park the 'propose' until a previous frame from this node completes */
+
+ unsigned int allocation_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes+2*k_torus_skb_alignment) : (tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+ unsigned int put_size = defer_skb_for_counter(dma_tcp) ? (payload_bytes-pad_head) : (tot_len+sizeof(struct ethhdr)) ;
+ /* TODO: Defer allocation of the full-size sk_buff until a reception counter is available */
+ struct sk_buff *skb = alloc_skb(allocation_size, GFP_ATOMIC) ; /* TODO: refine the size */
+ TRACEN(k_t_general,"allocation_size=0x%04x put_size=0x%04x skb=%p",allocation_size,put_size,skb) ;
+ instrument_flow(dma_tcp, k_parked) ;
+ if( skb)
+ {
+ if(k_scattergather_diagnostic) stamp_skb(skb,allocation_size) ;
+ skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head);
+ /* Bring in the frame header for diagnosis later ... */
+ memcpy(skb->data-pad_head,request,payload_bytes) ;
+ skb_put(skb,put_size) ;
+ if( k_scattergather_diagnostic) display_skb_structure(skb) ;
+ {
+ propose_skb_cb * pskbcb = (propose_skb_cb *)skb->cb ;
+ pskbcb->src_key=src_key ;
+ pskbcb->slot = slot ;
+ pskbcb->conn_id = conn_id ;
+ pskbcb->tot_len = tot_len ;
+ pskbcb->pad_head = pad_head ;
+ }
+ instrument_flow(dma_tcp,k_defer_accept_rpc_nodeflow) ;
+ enq_pending_flow(&dma_tcp->rcvdemux,slot,skb) ;
+ TRACEN(k_t_general,"Flow control (%d,%d,%d) skb=%p src_key=0x%08x slot=0x%08x conn_id=0x%08x tot_len=0x%04x pad_head=0x%02x proposals_active=%d qtyFreeRecCounters=%d",x,y,z,skb,src_key,slot,conn_id,tot_len,pad_head,proposals_active,dma_tcp->qtyFreeRecCounters) ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) No memory available for skbuff") ;
+ }
+ }
+ }
+ else
+ {
+ /* an 'accept' packet sent as a modified 'propose' ... */
+ struct accepthdr * accepth=(struct accepthdr *)(eth+1) ;
+ TRACEN(k_t_general,"'accept' src_key=0x%08x",src_key) ;
+ issue_accept(dma_tcp,accepth,src_key) ;
+ }
+ }
+
+static int issuePropActor(DMA_RecFifo_t *f_ptr,
+ DMA_PacketHeader_t *packet_ptr,
+ void *recv_func_parm,
+ char *payload_ptr,
+ int payload_bytes
+ )
+ {
+ unsigned int SW_Arg=packet_ptr->SW_Arg ;
+ int Put_Offset=packet_ptr->Put_Offset ;
+
+ issueProp(
+ (dma_tcp_t *) recv_func_parm,
+ (void *) payload_ptr,
+ SW_Arg,
+ payload_bytes,
+ Put_Offset
+ ) ;
+ return 0 ;
+ }
+typedef struct
+{
+ unsigned int reception_counter ;
+ unsigned char x, y, z ;
+} accept_skb_cb ;
+
+static inline void create_dma_descriptor_direct_put_offset(dma_tcp_t *dma_tcp,
+ unsigned int x, unsigned int y, unsigned int z,
+ int injection_counter,
+ int reception_counter,
+ dma_addr_t dataAddr,
+ int msglen,
+ DMA_InjDescriptor_t *desc,
+ unsigned int offset
+ )
+{
+ int ret1 __attribute((unused));
+ TRACEN(k_t_general|k_t_sgdiag , "(>) injecting x=%d y=%d z=%d injection_counter=0x%02x reception_counter=0x%02x dataAddr=0x%08llx msglen=0x%08x desc=%p offset=0x%04x",
+ x,y,z,injection_counter,reception_counter,dataAddr,msglen,desc,offset);
+ ret1 = DMA_TorusDirectPutDescriptor( desc,
+ x, y, z,
+ 0, /* hints */
+ virtual_channel(dma_tcp,k_VC_anyway), /* vc - adaptive */
+ k_InjectionCounterGroup, /* inj cntr group id */
+ injection_counter, /* inj counter id */
+ dataAddr, /* send offset */
+ 0, /* rec ctr grp */
+ reception_counter,
+ offset, /* reception offset */
+ msglen /* message length */
+ );
+ TRACEN(k_t_general , "(<) ret1=%d",ret1);
+
+}
+
+#endif
+
+static void receive_skb_using_counter(dma_tcp_t *dma_tcp,struct sk_buff *skb_next, unsigned int counter_index,
+ unsigned int pad_head, unsigned int slot, unsigned int conn_id,
+ unsigned int x, unsigned int y,unsigned int z,
+ unsigned int tot_len,
+ unsigned int src_key)
+{
+ struct ethhdr* eth=(struct ethhdr *)(skb_next->data) ;
+ dma_addr_t dataAddr = dma_map_single(NULL, skb_next->data-pad_head, skb_next->len+pad_head, DMA_FROM_DEVICE);
+ frame_injection_cb * ficb = (frame_injection_cb *) skb_next->cb ;
+ unsigned int counter_base=dataAddr>>4 ;
+ unsigned int counter_max=((dataAddr+tot_len+pad_head+sizeof(struct ethhdr)) >> 4)+1 ;
+ unsigned int propose_len = eth->h_source[0] ;
+ unsigned int dma_count = k_abbreviate_headlen ? (skb_next->len+pad_head-propose_len) : (skb_next->len+pad_head) ;
+
+#if defined(AUDIT_FRAME_HEADER)
+ memcpy(all_headers_in_counters+counter_index,skb_next->data,sizeof(frame_header_t)) ;
+#endif
+
+ dma_tcp->balancer.b[report_transmission_fifo(dma_tcp,x,y,z)].outstanding_counters += 1 ;
+
+ dma_tcp->slot_for_rcv[counter_index]=slot ;
+ dma_tcp->conn_for_rcv[counter_index]=conn_id | 0x80 ; /* Mark it up as having been delayed */
+ TRACEN(k_t_general|k_t_scattergather|k_t_sgdiag,"Reception counter 0x%02x [%08x %08x %08x] assigned to (%d,%d,%d) conn_id=0x%08x skb=%p propose_len=0x%02x",
+ counter_index,dma_count,counter_base,counter_max,x,y,z,conn_id,skb_next,propose_len) ;
+ ficb->free_when_done = 0 ;
+
+ dma_tcp->rcv_skbs[counter_index] = skb_next ;
+ dma_tcp->rcv_timestamp[counter_index] = jiffies ;
+ {
+ unsigned int proposed_dma_length = tot_len+pad_head+sizeof(struct ethhdr) ;
+ unsigned int available_skb_length = skb_next->end - (skb_next->data-pad_head) ;
+ if( proposed_dma_length > available_skb_length )
+ {
+ TRACEN(k_t_error,"(!!!) skb=%p not big enough, dma=0x%08x bytes, pad_head=0x%02x, skb(head=%p data=%p tail=%p end=%p)",
+ skb_next,proposed_dma_length,pad_head,skb_next->head,skb_next->data,skb_next->tail,skb_next->end
+ ) ;
+ show_stack(NULL,NULL) ;
+ }
+ }
+ DMA_CounterSetValueBaseMaxHw(dma_tcp->recCounterGroup.counter[counter_index].counter_hw_ptr,dma_count,dataAddr >> 4, ((dataAddr+tot_len+pad_head+sizeof(struct ethhdr)) >> 4)+1) ;
+ instrument_flow(dma_tcp,k_send_accept_rpc) ;
+ {
+ /* Push out a 'reverse propose' frame, adjust it so it overlays the area beyond the initial frame which will be replaced by the response DMA */
+ struct iphdr* iph = (struct iphdr*)(eth+1) ;
+ struct ethhdr* accept_eth0 = (struct ethhdr *)(iph+1) ;
+ struct ethhdr* accept_eth = (struct ethhdr *)(skb_next->data-pad_head+propose_len) ;
+ struct accepthdr * accepth=(struct accepthdr *)(accept_eth+1) ;
+ TRACEN(k_t_general,"accept_eth0=%p accepth=%p",accept_eth0,accept_eth) ;
+ tot_len_for_rcv[counter_index] = iph->tot_len ; // For diagnostics if the torus hangs
+ memcpy(accept_eth,eth,sizeof(struct ethhdr)) ;
+ memcpy(&accepth->iph,iph,sizeof(iph)) ; // TODO: Diagnose the apparent 'scribble' at the sender, then take this away
+ accepth->conn_id=conn_id ;
+ accepth->reception_counter=counter_index ;
+ if( (unsigned int)(accepth+1) > (unsigned int)(skb_next->end))
+ {
+ TRACEN(k_t_error,"(!!!) skb=%p not big enough, (accepth+1)=%p, skb(head=%p data=%p tail=%p end=%p)",
+ skb_next,accepth+1,skb_next->head,skb_next->data,skb_next->tail,skb_next->end
+ ) ;
+ show_stack(NULL,NULL) ;
+
+ }
+ TRACEN(k_t_general,"accept_eth=%p accepth=%p src_key=0x%08x conn_id=0x%08x counter_index=0x%08x",accept_eth,accepth,src_key,conn_id,counter_index) ;
+ create_dma_descriptor_propose_accept(dma_tcp,
+ (void *)(accept_eth),
+ 48,
+ x,y, z,
+ dma_tcp->proto_transfer_propose,
+ (dma_tcp->src_key << 4),
+ conn_id,
+ 0,
+ &ficb->desc,
+ 48
+ ) ;
+ DMA_CounterSetEnableById(&dma_tcp->recCounterGroup,counter_index) ;
+ bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp,skb_next,0, k_first_class) ;
+ }
+
+}
+
+static void handle_empty_recCounter_deliver(dma_tcp_t *dma_tcp, unsigned int counter_index)
+{
+ rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+ struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ;
+ unsigned int slot = dma_tcp->slot_for_rcv[counter_index] ;
+ unsigned int proposals_active=get_proposals_active(rcvdemux,slot) ;
+ set_proposals_active(rcvdemux,slot,proposals_active-1) ;
+ TRACEN(k_t_general|k_t_sgdiag,"counter_index=0x%02x skb=%p",counter_index,skb) ;
+ if( skb)
+ {
+#if defined(AUDIT_FRAME_HEADER)
+ if(memcmp(skb->data,((char *)(all_headers_in_counters+counter_index)),32))
+ {
+ TRACEN(k_t_request,"(!!!) header not as first seen") ;
+ dumpmem(skb->data,sizeof(frame_header_t),"header-now") ;
+ dumpmem(all_headers_in_counters+counter_index,sizeof(frame_header_t),"header-in-propose") ;
+
+ }
+#endif
+
+ {
+ struct ethhdr *eth=(struct ethhdr *)(skb->data) ;
+ unsigned int x=ethhdr_src_x(eth) ;
+ unsigned int y=ethhdr_src_y(eth) ;
+ unsigned int z=ethhdr_src_z(eth) ;
+ eth->h_source[0] = eth->h_dest[0] ; // Replug the item that got taken for DMA sideband
+ dma_tcp->balancer.b[report_transmission_fifo(dma_tcp,x,y,z)].outstanding_counters -= 1 ;
+ }
+ deliver_from_slot(dma_tcp,slot,dma_tcp->conn_for_rcv[counter_index],skb) ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) counter_index=0x%02x no skbuff, slot=0x%08x proposals_active=%d",counter_index,slot,proposals_active) ;
+ }
+
+}
+
+static void handle_empty_recCounter_flush(dma_tcp_t *dma_tcp, unsigned int counter_index)
+{
+ rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+ struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ;
+ unsigned int slot = dma_tcp->slot_for_rcv[counter_index] ;
+ unsigned int proposals_active=get_proposals_active(rcvdemux,slot) ;
+ unsigned int counter_value = DMA_CounterGetValueNoMsync(dma_tcp->recCounterGroup.counter+counter_index) ;
+ set_proposals_active(rcvdemux,slot,proposals_active-1) ;
+ TRACEN(k_t_request,"(!!!) flushing counter_index=0x%02x skb=%p",counter_index,skb) ;
+ DMA_CounterSetDisableById(&dma_tcp->recCounterGroup,counter_index) ;
+ dma_tcp_show_reception_one(dma_tcp,counter_index,counter_value) ;
+ if( skb)
+ {
+#if defined(AUDIT_FRAME_HEADER)
+ if(memcmp(skb->data,((char *)(all_headers_in_counters+counter_index)),32))
+ {
+ TRACEN(k_t_request,"(!!!) header not as first seen") ;
+ dumpmem(skb->data,sizeof(frame_header_t),"header-now") ;
+ dumpmem(all_headers_in_counters+counter_index,sizeof(frame_header_t),"header-in-propose") ;
+
+ }
+#endif
+ dev_kfree_skb(skb) ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) counter_index=0x%02x no skbuff, slot=0x%08x proposals_active=%d",counter_index,slot,proposals_active) ;
+ }
+
+}
+
+static void handle_empty_recCounter_reload(dma_tcp_t *dma_tcp, unsigned int counter_index, unsigned int x0, unsigned int y0, unsigned int z0)
+{
+ rcv_t *rcvdemux = &dma_tcp->rcvdemux ;
+ struct sk_buff * skb_next ;
+ unsigned int slot = dma_tcp->slot_for_rcv[counter_index] ;
+ unsigned int proposals_active=get_proposals_active(rcvdemux,slot)+1 ;
+ if( k_counter_flow_control )
+ {
+ /* We're going to get a queued frame, but which queue we try first will depend on whether this source */
+ /* is over quota at the moment */
+ if (proposals_active > count_pending_flow(rcvdemux,slot)+1 && should_park(dma_tcp,proposals_active,x0,y0,z0))
+ {
+ /* If we have a 'queued' frame, take that */
+ skb_next = pending_rcv_skb_dequeue(dma_tcp) ;
+ TRACEN(k_t_general,"skb_next=%p",skb_next) ;
+ if( ! skb_next)
+ {
+ /* Try a 'parked' frame */
+ skb_next=deq_pending_flow(rcvdemux,slot) ;
+ }
+
+ }
+ else
+ {
+ /* If we have a 'parked' frame from the same source, get it moving now */
+ skb_next=deq_pending_flow(rcvdemux,slot) ;
+ TRACEN(k_t_general,"skb_next=%p",skb_next) ;
+ if( ! skb_next)
+ {
+ /* If nothing 'parked', try the general queue */
+ skb_next = pending_rcv_skb_dequeue(dma_tcp) ;
+ }
+
+ }
+ }
+ else
+ {
+ skb_next = pending_rcv_skb_dequeue(dma_tcp) ;
+ }
+ if( skb_next)
+ {
+ /* A request was waiting for a receive counter, which is now available */
+ propose_skb_cb * pskcb = (propose_skb_cb *)skb_next->cb ;
+ unsigned int src_key=pskcb->src_key ;
+ struct ethhdr* eth=(struct ethhdr *)(skb_next->data) ;
+ unsigned int x=ethhdr_src_x(eth) ;
+ unsigned int y=ethhdr_src_y(eth) ;
+ unsigned int z=ethhdr_src_z(eth) ;
+ unsigned int slot=pskcb->slot ;
+ unsigned int conn_id=pskcb->conn_id ;
+ unsigned int pad_head=pskcb->pad_head ;
+ unsigned int tot_len=pskcb->tot_len ;
+ if( defer_skb_for_counter(dma_tcp))
+ {
+ /* Need a new sk_buff; need to set up alignment */
+ /* TODO: shouldn't need alignment */
+ /* TODO: Copy in the data from the old skbuff, so that the DMA doesn't need to resend it */
+ unsigned int allocation_size = (tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+ /* TODO: Defer allocation of the full-size sk_buff until a reception counter is available */
+ struct sk_buff *skb = alloc_skb((allocation_size > 256) ? allocation_size : 256, GFP_ATOMIC) ; /* TODO: refine the size */
+ TRACEN(k_t_general,"skb_next=%p skb=%p allocation_size=%d copying_length=%d src_key=0x%08x slot=0x%08x conn_id=0x%08x pad_head=0x%02x tot_len=0x%04x",skb_next,skb,allocation_size,skb_next->len,src_key,slot,conn_id,pad_head,tot_len) ;
+ if( skb)
+ {
+ if(k_scattergather_diagnostic) stamp_skb(skb,tot_len+sizeof(struct ethhdr)+3*k_torus_skb_alignment) ;
+ skb_reserve(skb, (k_torus_skb_alignment - ((unsigned int)(skb->data)) % k_torus_skb_alignment)+pad_head);
+ memcpy(skb->data,skb_next->data,skb_next->len) ;
+ skb_put(skb,tot_len+sizeof(struct ethhdr)) ;
+ TRACEN(k_t_general,"skb->data=%p skb->len=0x%04x skb_next->data=%p skb_next->len=0x%04x",
+ skb->data, skb->len, skb_next->data, skb_next->len) ;
+ if( k_scattergather_diagnostic) display_skb_structure(skb) ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) No memory available for skbuff, torus will jam") ;
+ /* TODO: Could handle this by deferring until memory is available, or by sending a 'negative COP' and having the sender back off */
+ }
+ dev_kfree_skb(skb_next) ;
+ skb_next=skb ;
+ eth=(struct ethhdr *)(skb_next->data) ; // Fix up, 'accept' setup uses this
+
+ }
+ if( skb_next)
+ {
+ receive_skb_using_counter(dma_tcp,skb_next,counter_index,pad_head,slot,conn_id,x,y,z,tot_len,src_key) ;
+ }
+ else
+ {
+ TRACEN(k_t_error,"(E) No memory available for skbuff, torus will jam") ;
+ /* TODO: Could handle this by deferring until memory is available, or by sending a 'negative COP' and having the sender back off */
+ }
+ }
+ else
+ {
+ TRACEN(k_t_general|k_t_scattergather,"Reception counter 0x%02x vacant",counter_index) ;
+ dma_tcp->recCntrInUse[counter_index] = 0 ;
+ dma_tcp->rcv_skbs[counter_index] = NULL ;
+ dma_tcp->qtyFreeRecCounters += 1 ;
+ DMA_CounterSetDisableById(&dma_tcp->recCounterGroup,counter_index) ;
+ }
+
+}
+
+static void handle_empty_recCounter(dma_tcp_t *dma_tcp, unsigned int counter_index)
+{
+ struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ;
+ struct ethhdr *eth=(struct ethhdr *)(skb->data) ;
+ unsigned int x0 = ethhdr_src_x(eth) ;
+ unsigned int y0 = ethhdr_src_y(eth) ;
+ unsigned int z0 = ethhdr_src_z(eth) ;
+ handle_empty_recCounter_deliver(dma_tcp,counter_index) ;
+ handle_empty_recCounter_reload(dma_tcp,counter_index,x0,y0,z0) ;
+}
+
+static void handle_stuck_recCounter(dma_tcp_t *dma_tcp, unsigned int counter_index)
+{
+ struct sk_buff *skb=dma_tcp->rcv_skbs[counter_index] ;
+ struct ethhdr *eth=(struct ethhdr *)(skb->data) ;
+ unsigned int x0 = ethhdr_src_x(eth) ;
+ unsigned int y0 = ethhdr_src_y(eth) ;
+ unsigned int z0 = ethhdr_src_z(eth) ;
+
+ instrument_flow(dma_tcp,k_receive_incomplete) ;
+ handle_empty_recCounter_flush(dma_tcp,counter_index) ;
+ handle_empty_recCounter_reload(dma_tcp,counter_index,x0,y0,z0) ;
+}
+
+static void check_stuck_recCounters(dma_tcp_t *dma_tcp)
+{
+ unsigned int x ;
+ int j = jiffies ;
+ for(x=0;x<DMA_NUM_COUNTERS_PER_GROUP;x+=1)
+ {
+ if(dma_tcp->rcv_skbs[x] && (j-dma_tcp->rcv_timestamp[x]) >= 3*HZ )
+ {
+ TRACEN(k_t_request,"(!!!) counter 0x%02x not completed after %d jiffies, freeing it",x,j-dma_tcp->rcv_timestamp[x]) ;
+ handle_stuck_recCounter(dma_tcp,x) ;
+ }
+ }
+}
+
+void bgp_dma_tcp_empty_fifo_callback(void)
+{
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ unsigned int word0 , word1 ;
+ DMA_CounterGetAllHitZero(&dma_tcp->recCounterGroup, &word0, &word1) ;
+ if( word0 != 0 )
+ {
+ DMA_CounterGroupClearHitZero(&dma_tcp->recCounterGroup, 0, word0) ;
+ TRACEN(k_t_general,"recCounterGroup word0=0x%08x",word0) ;
+ do {
+ unsigned int counter_index=32-fls(word0) ; /* Find the highest-order bit that is set */
+ word0 &= (0x7fffffff >> counter_index) ; /* Clear it */
+ handle_empty_recCounter(dma_tcp,counter_index) ;
+ } while ( word0 != 0) ;
+ }
+ if( word1 != 0)
+ {
+ DMA_CounterGroupClearHitZero(&dma_tcp->recCounterGroup, 1, word1) ;
+ TRACEN(k_t_general,"recCounterGroup word1=0x%08x",word1) ;
+ do {
+ unsigned int counter_index=32-fls(word1) ; /* Find the highest-order bit that is set */
+ word1 &= (0x7fffffff >> counter_index) ; /* Clear it */
+ handle_empty_recCounter(dma_tcp,32+counter_index) ;
+ } while ( word1 != 0) ;
+ }
+ /* 'clear orphaned reception counters' only works correctly if we are doing eager delivery */
+ if( deliver_eagerly(dma_tcp))
+ {
+ int checked_time = dma_tcp->rcv_checked_time ;
+ int j = jiffies ;
+ int elapsed = j - checked_time ;
+ if( elapsed > HZ)
+ {
+ dma_tcp->rcv_checked_time = j ;
+ check_stuck_recCounters(dma_tcp) ;
+ }
+
+ }
+
+
+}
+
+int bgp_dma_tcp_counter_copies[DMA_NUM_COUNTERS_PER_GROUP] ;
+
+
+static inline int inject_into_dma_taxi(dma_tcp_t *dma_tcp, void * address, unsigned int length, unsigned int x, unsigned int y, unsigned int z, unsigned int my_injection_group, unsigned int desired_fifo, unsigned int proto, unsigned int SW_Arg )
+ {
+ dma_addr_t dataAddr ;
+ DMA_InjDescriptor_t desc;
+ int ret1, ret2 ;
+ TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo);
+/* TRACEN(k_t_scattergather,"injecting, length=0x%04x my_injection_group=%d desired_fifo=%d",length,my_injection_group,desired_fifo) ; */
+ dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+ ret1 = DMA_TorusMemFifoDescriptor( &desc,
+ x, y, z,
+ k_ReceptionFifoGroup, /* recv fifo grp id */
+ 0, /* hints */
+ virtual_channel(dma_tcp,k_VC_anyway), /* go whichver way it wants */
+ SW_Arg, /* softw arg */
+ proto, /* function id */
+ k_InjectionCounterGroup, /* inj cntr group id */
+ k_injCounterId, /* inj counter id */
+ dataAddr, /* send address */
+ length /* msg len */
+ );
+
+
+ DMA_DescriptorSetPutOffset(&desc,-length) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to indicate the message (fragment) length */
+ ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+ dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+ &desc );
+ TRACEN(k_t_scattergather , "tgt=[%d %d %d] length=0x%04x injfifo[%d %02x]\n",
+ x,y,z,length,
+ my_injection_group,desired_fifo ) ;
+ TRACEN(k_t_general , "(<) ret1=%d ret2=%d",ret1, ret2);
+ return 1 ;
+ }
+
+
+
+/* The injectors are currently set up so that each 'software FIFO' pushes to a single (different) 'hardware FIFO' */
+/* This isn't needed for 'adaptive'; things could be rearranged for all 'software FIFOs' to have access to all 'hardware FIFOs' */
+enum {
+ k_my_vc_for_adaptive = k_VC_anyway
+/* Diagnostically flip it to 'deterministic' ... */
+/* k_my_vc_for_adaptive = k_VC_ordering */
+};
+static inline int inject_into_dma_adaptive(dma_tcp_t *dma_tcp,
+ void * address,
+ unsigned int length,
+ unsigned int x, unsigned int y, unsigned int z,
+ unsigned int my_injection_group,
+ unsigned int desired_fifo,
+ unsigned int proto,
+ unsigned int SW_Arg,
+ unsigned int conn_id )
+ {
+ dma_addr_t dataAddr ;
+ DMA_InjDescriptor_t desc;
+ int ret1, ret2 __attribute((unused));
+ unsigned int firstpacketlength = ( length > k_injection_packet_size) ? k_injection_packet_size : length ;
+ unsigned int midpacketcount = (length-(k_injection_packet_size+1)) / k_injection_packet_size ;
+ unsigned int packetcount = (length > k_injection_packet_size) ? (midpacketcount+2) : 1 ;
+ int PutOffset = (conn_id << 25) | (packetcount << 16) | ((-length) & 0xfff0) ;
+ TRACEN(k_t_general , "(>) injecting address=%p length=0x%08x x=%d y=%d z=%d my_injection_group=%d desired_fifo=%d",address,length,x,y,z,my_injection_group,desired_fifo);
+ dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+ if( length >= 10000)
+ {
+ TRACEN(k_t_request,"address=%p length=0x%08x dataAddr=0x%08llx",address,length,dataAddr) ;
+ }
+
+/* First injection is 'start of frame/fragment' */
+ ret1 = DMA_TorusMemFifoDescriptor( &desc,
+ x, y, z,
+ k_ReceptionFifoGroup, /* recv fifo grp id */
+ 0, /* hints */
+ virtual_channel(dma_tcp,k_my_vc_for_adaptive), /* vc - adaptive */
+ SW_Arg, /* softw arg */
+ proto, /* function id */
+ k_InjectionCounterGroup, /* inj cntr group id */
+ k_injCounterId, /* inj counter id */
+ dataAddr, /* send address */
+ packetcount*firstpacketlength /* msg len */
+ );
+
+
+ DMA_DescriptorSetPutOffset(&desc,PutOffset) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to pass required data to receive actor */
+ ret2 = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+ dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+ &desc );
+ TRACEN(k_t_scattergather ,"tgt=[%d %d %d] length=0x%04x injfifo[%d %02x] conn_id=0x%02x\n",
+ x,y,z,length,
+ my_injection_group,desired_fifo,conn_id ) ;
+ TRACEN(k_t_general , "proto=%d firstpacketlength=%d ret1=%d ret2=%d",proto,firstpacketlength,ret1, ret2);
+
+ return 1 ;
+
+ }
+
+static inline void create_dma_descriptor_adaptive(dma_tcp_t *dma_tcp,
+ void * address,
+ unsigned int length,
+ unsigned int x, unsigned int y, unsigned int z,
+ unsigned int proto,
+ unsigned int SW_Arg,
+ unsigned int conn_id,
+ DMA_InjDescriptor_t *desc)
+ {
+ dma_addr_t dataAddr ;
+ int ret1 __attribute__((unused));
+ unsigned int firstpacketlength = ( length > k_injection_packet_size) ? k_injection_packet_size : length ;
+ unsigned int midpacketcount = (length-(k_injection_packet_size+1)) / k_injection_packet_size ;
+ unsigned int packetcount = (length > k_injection_packet_size) ? (midpacketcount+2) : 1 ;
+ int PutOffset = (conn_id << 25) | (packetcount << 16) | ((-length) & 0xfff0) ;
+ TRACEN(k_t_general , "(>) address=%p length=0x%08x x=%d y=%d z=%d proto=%d SW_Arg=0x%08x desc=%p",address,length,x,y,z,proto,SW_Arg,desc);
+ dataAddr = dma_map_single(NULL, address, length, DMA_TO_DEVICE);
+ if( length >= 10000)
+ {
+ TRACEN(k_t_request,"address=%p length=0x%08x dataAddr=0x%08llx",address,length,dataAddr) ;
+ }
+
+/* First injection is 'start of frame/fragment' */
+ ret1 = DMA_TorusMemFifoDescriptor( desc,
+ x, y, z,
+ k_ReceptionFifoGroup, /* recv fifo grp id */
+ 0, /* hints */
+ virtual_channel(dma_tcp,k_my_vc_for_adaptive), /* vc - adaptive */
+ SW_Arg, /* softw arg */
+ proto, /* function id */
+ k_InjectionCounterGroup, /* inj cntr group id */
+ k_injCounterId, /* inj counter id */
+ dataAddr, /* send address */
+ packetcount*firstpacketlength /* msg len */
+ );
+
+ DMA_DescriptorSetPutOffset(desc,PutOffset) ; /* For 'memory FIFO packets', the put offset has no hardware use. Set it to pass required data to receive actor */
+ TRACEN(k_t_general , "(<) firstpacketlength=%d ret1=%d",firstpacketlength,ret1);
+
+ }
+
+static inline int inject_dma_descriptor_adaptive(dma_tcp_t *dma_tcp,
+ unsigned int my_injection_group,
+ unsigned int desired_fifo,
+ DMA_InjDescriptor_t *desc)
+ {
+ int ret __attribute__((unused));
+ TRACEN(k_t_general|k_t_sgdiag , "(>) injecting my_injection_group=%d desired_fifo=%d desc=%p",my_injection_group,desired_fifo,desc);
+ TRACEN(k_t_sgdiag,"injecting 0x%04x bytes",desc->msg_length) ;
+ ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+ dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+ desc );
+
+ TRACEN(k_t_general , "(<) ret=%d",ret);
+ return 1 ;
+
+ }
+
+static inline int inject_dma_descriptors_adaptive(dma_tcp_t *dma_tcp,
+ unsigned int my_injection_group,
+ unsigned int desired_fifo,
+ DMA_InjDescriptor_t **desc,
+ unsigned int count )
+ {
+ int ret __attribute__((unused));
+ int r2 __attribute__((unused));
+ unsigned int fifo_index = my_injection_group*k_injecting_directions+desired_fifo ;
+ TRACEN(k_t_general|k_t_sgdiag , "(>) injecting my_injection_group=%d desired_fifo=%d desc=%p count=%d fifo_id=0x%02x",
+ my_injection_group,desired_fifo,desc,count, dma_tcp->injFifoFramesIds[fifo_index]);
+ if( 0 == desc[0]->msg_length)
+ {
+ TRACEN(k_t_general,"(I) msg_length[0] zero, injection skipped") ;
+ desc += 1 ;
+ count -= 1 ;
+ }
+ ret = DMA_InjFifoInjectDescriptorsById( &dma_tcp->injFifoGroupFrames,
+ dma_tcp->injFifoFramesIds[fifo_index],
+ count,
+ desc );
+ r2=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId, 0xffffffff );
+ if( ret != count)
+ {
+ TRACEN(k_t_error,"(!!!) count=%d ret=%d",count,ret) ;
+ }
+
+ TRACEN(k_t_general , "(<) count=%d fifo_id=0x%02x",
+ count,dma_tcp->injFifoFramesIds[fifo_index]);
+
+ return count ;
+ }
+
+/* Don't actually need this; the length is precise anyway, we just may waste some cells in the last packet */
+#if 0
+static inline int inject_dma_descriptor_adaptive_precise_length(dma_tcp_t *dma_tcp,
+ unsigned int my_injection_group,
+ unsigned int desired_fifo,
+ DMA_InjDescriptor_t *desc)
+ {
+ unsigned int size=desc->msg_length ;
+ unsigned int full_frame_count=size / k_torus_link_payload_size ;
+ unsigned int full_frame_size = full_frame_count * k_torus_link_payload_size ;
+ unsigned int trailing_frame_size = size - full_frame_size ;
+ unsigned int rc=0 ;
+ if(0 == trailing_frame_size || 0 == full_frame_count) // These cases were already 'precise'
+ {
+ int ret __attribute__((unused));
+ TRACEN(k_t_general , "(>) injecting my_injection_group=%d desired_fifo=%d desc=%p",my_injection_group,desired_fifo,desc);
+ ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+ dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+ desc );
+ TRACEN(k_t_general , "(<) ret=%d",ret);
+ return 1 ;
+ }
+ else
+ {
+ /* Need to split into 2 injections in order not to transmit extra cells */
+ int ret __attribute__((unused));
+ desc->msg_length=full_frame_size ;
+ ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+ dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+ desc );
+ desc->msg_length=trailing_frame_size ;
+ desc->base_offset += full_frame_size ;
+ desc->hwHdr.Chunks = DMA_PacketChunks(trailing_frame_size) - 1 ;
+ ret = wrapped_DMA_InjFifoInjectDescriptorById( &dma_tcp->injFifoGroupFrames,
+ dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo],
+ desc );
+ return 2 ;
+
+
+
+ }
+
+ }
+#endif
+
+
+static void analyse_skb(struct sk_buff *skb) __attribute__ ((unused)) ;
+static void analyse_skb(struct sk_buff *skb)
+ {
+ struct sock *sk=skb->sk ;
+ struct inet_sock *inet = inet_sk(sk);
+ struct inet_connection_sock *icsk = inet_csk(sk);
+ unsigned int daddr=inet->daddr ;
+ unsigned int flags = TCP_SKB_CB(skb)->flags ;
+ if(icsk->icsk_retransmits > 0 )
+ {
+ TRACEN(k_t_congestion,"(I) sk=%p skb=%p data=%p len=%d flags=0x%02x ip=%u.%u.%u.%u icsk_retransmits=%d icsk_rto=%d resending (BGP)",
+ sk, skb, skb->data, skb->len, flags,
+ daddr>>24, (daddr>>16)&0xff,(daddr>>8)&0xff,daddr&0xff,
+ icsk->icsk_retransmits, icsk->icsk_rto ) ;
+ }
+ }
+
+static inline int selfsend(const torusLocation_t * t, unsigned int x, unsigned int y, unsigned int z)
+{
+ unsigned int tx=t->coordinate[0] ;
+ unsigned int ty=t->coordinate[1] ;
+ unsigned int tz=t->coordinate[2] ;
+ return (tx == x && ty == y && tz == z) ;
+}
+
+static inline int offfabric(const torusLocation_t * t, unsigned int x, unsigned int y, unsigned int z)
+{
+ unsigned int tx=t->coordinate[0] ;
+ unsigned int ty=t->coordinate[1] ;
+ unsigned int tz=t->coordinate[2] ;
+ return (x >= tx || y >= ty || z >= tz) ;
+}
+static inline void clear_dir_in_use(unsigned char * direction_is_in_use)
+{
+ int x ;
+ for(x=0;x<=k_injecting_directions;x+=1)
+ {
+ direction_is_in_use[x] = 0 ;
+ }
+}
+
+static inline void record_dir_in_use(dma_tcp_t * dma_tcp,unsigned char * direction_is_in_use)
+{
+ int x ;
+ for(x=0;x<k_injecting_directions;x+=1)
+ {
+ dma_tcp->tx_in_use_count[x] += direction_is_in_use[x] ;
+ }
+ dma_tcp->tx_in_use_count[k_injecting_directions] += 1 ;
+}
+
+/* Routine to free all the skbuffs that control data which has left the node */
+static void dma_tcp_frames_transmission_free_skb(unsigned long parm)
+ {
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ unsigned int core ;
+ unsigned int total_injection_used = 0 ;
+ unsigned char direction_is_in_use[k_skb_controlling_directions] ;
+ clear_dir_in_use(direction_is_in_use) ;
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ unsigned long long now=get_powerpc_tb() ;
+#endif
+ for( core=0 ; core<k_injecting_cores; core += 1)
+ {
+ unsigned int desired_fifo ;
+ for(desired_fifo=0; desired_fifo<k_skb_controlling_directions; desired_fifo += 1 )
+ {
+ spinlock_t * injectionLock = &dma_tcp->dirInjectionLock[core*k_injecting_directions+desired_fifo] ;
+ idma_direction_t * buffer = dma_tcp->idma.idma_core[core].idma_direction+desired_fifo ;
+ unsigned int fifo_initial_head = dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].fifo_initial_head ;
+ unsigned int bhx = buffer->buffer_head_index ;
+ unsigned int btx = buffer->buffer_tail_index ; /* This indexes the oldest skbuff that might still be pending send by the DMA unit */
+ unsigned int fifo_current_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+ unsigned int fifo_current_tail =
+ (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+ unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+ unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+ unsigned int current_injection_used=packet_mod(tailx-headx) ;
+ int skql2 = packet_mod(bhx-btx) ;
+ if( 0 != current_injection_used ) direction_is_in_use[desired_fifo] = 1 ;
+ if( skql2 != current_injection_used)
+ {
+ skb_group_t skb_group ;
+
+ skb_group_init(&skb_group) ;
+ if( spin_trylock(injectionLock))
+ {
+ unsigned int bhx = buffer->buffer_head_index ;
+ unsigned int btx = buffer->buffer_tail_index ; /* This indexes the oldest skbuff that might still be pending send by the DMA unit */
+ unsigned int fifo_current_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+ unsigned int fifo_current_tail =
+ (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+desired_fifo]) ;
+ unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+ unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+ unsigned int current_injection_used=packet_mod(tailx-headx) ;
+ int skql2 = packet_mod(bhx-btx) ;
+ int count_needing_freeing = skql2-current_injection_used ;
+ int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ;
+ TRACEN(k_t_detail,"current_injection_used=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,skql2,count_needing_freeing,count_to_free);
+ skb_group_queue(&skb_group,dma_tcp->idma.idma_core[core].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ , core, desired_fifo, now
+#endif
+ ) ;
+ btx = packet_mod(btx+count_to_free) ;
+ buffer->buffer_tail_index = btx ;
+ TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index);
+ total_injection_used += current_injection_used ;
+
+ spin_unlock(injectionLock) ;
+ skb_group_free(&skb_group) ;
+ }
+ else
+ {
+ total_injection_used += current_injection_used ;
+ }
+ }
+ }
+ }
+ TRACEN(k_t_detail,"total_injection_used=%d",total_injection_used) ;
+ record_dir_in_use(dma_tcp,direction_is_in_use) ;
+ if( total_injection_used > 0 )
+ {
+ mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ;
+ }
+ }
+
+
+static void display_skb_structure(struct sk_buff *skb)
+{
+ int f ;
+ unsigned int headlen=skb_headlen(skb) ;
+ TRACEN(k_t_request, "sk_buff(head=%p data=%p tail=%p end=%p len=0x%08x data_len=0x%08x nr_frags=%d",
+ skb->head, skb->data, skb->tail, skb->end, skb->len, skb->data_len, skb_shinfo(skb)->nr_frags) ;
+ dumpmem(skb->data,(headlen > 256) ? 256 : headlen,"skb head") ;
+ for(f=0; f<skb_shinfo(skb)->nr_frags; f+=1)
+ {
+ struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f];
+ unsigned int page_offset=frag->page_offset ;
+ unsigned int size = frag->size ;
+ TRACEN(k_t_request, " frags[%d](page_offset=0x%08x size=0x%08x)",
+ f,page_offset,size) ;
+ }
+}
+
+static inline unsigned int imin2(unsigned int a, unsigned int b)
+{
+ return (a>b) ? b : a ;
+}
+#if defined(USE_SKB_TO_SKB)
+static void bgp_dma_tcp_s_and_f_frames_dma(
+ dma_tcp_t *dma_tcp,
+ struct sk_buff *skb
+ )
+{
+ frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ;
+ struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+ unsigned int x = eth->h_dest[3] ;
+ unsigned int y = eth->h_dest[4] ;
+ unsigned int z = eth->h_dest[5] ;
+ unsigned int payload_address = (unsigned int)(skb->data) ;
+ unsigned int aligned_payload_address = payload_address & (~ 0x0f) ;
+ unsigned int pad_head = payload_address & 0x0f ;
+ unsigned int src_key = (dma_tcp->src_key << 4) | pad_head ; /* Everything to a given node will go on the same stream, no point coding injection group in */
+ unsigned int headlen = skb_headlen(skb) ;
+ TRACEN(k_t_general ,"(>)skb=%p (%02x,%02x,%02x) data=%p length=%d data_len=%d headlen=%d", skb,x,y,z,skb->data, skb->len, skb->data_len,headlen);
+ dumpframe(skb->data, skb_headlen(skb), "skbuff to send") ;
+
+ TRACEN(k_t_general, "(=)(I) testdma: Sending to (%d,%d,%d)",
+ x, y, z );
+
+ /* Make sure we're not trying to send off the partition or to self */
+ if( k_verify_target)
+ {
+ if( offfabric(&(dma_tcp->extent),x,y,z))
+ {
+ TRACEN(k_t_error, "(W) Target (%d,%d,%d) not in range",x,y,z) ;
+ WARN_ON(1) ;
+ dev_kfree_skb(skb) ;
+ return ;
+ }
+ if( selfsend(&(dma_tcp->location),x,y,z))
+ {
+ TRACEN(k_t_error, "(W) Self-send not supported by hardware (%d %d %d)",x,y,z) ;
+ WARN_ON(1) ;
+ dev_kfree_skb(skb) ;
+ return ;
+ }
+ }
+
+ TRACEN(k_t_protocol,"(=)sending packet to (%02x,%02x,%02x) length=%d",
+ x,y,z,skb->len) ;
+
+ /* copy descriptor into the inj fifo */
+ {
+ unsigned int dest_key = x*dma_tcp->extent.coordinate[1]*dma_tcp->extent.coordinate[2]
+ +y*dma_tcp->extent.coordinate[2]
+ +z ;
+ unsigned int conn_id = take_tx_conn_id(&dma_tcp->tx_mux,dest_key) ;
+ atomic_inc(&dma_tcp->framesProposed) ;
+ TRACEN(k_t_general,"Saving skb=%p for dest_key=0x%08x conn_id=0x%08x",skb,dest_key,conn_id) ;
+ set_tx_skb(&dma_tcp->tx_mux,dest_key,conn_id,skb) ;
+ ficb->free_when_done = 0 ;
+
+#if defined(AUDIT_HEADLEN)
+ {
+ struct iphdr *iph = (struct iphdr *)(eth+1) ;
+ ficb->tot_len = iph->tot_len ;
+ }
+#endif
+ {
+ /* If we have a 'scatter-gather' skb, try to put the head into the 'propose' packet */
+ unsigned int nr_frags = skb_shinfo(skb)->nr_frags ;
+ unsigned int propose_length = (nr_frags == 0 ) ? 48 : imin2(pad_head+headlen,k_torus_link_payload_size) ;
+ eth->h_source[0] = propose_length ; // Use a byte on-the-side to say how much data was actually sent
+ TRACEN(k_t_general,"nr_frags=%d propose_length=%d",nr_frags,propose_length) ;
+ create_dma_descriptor_propose_accept(dma_tcp,
+ (void *)aligned_payload_address,
+ propose_length,
+ x,y, z,
+ dma_tcp->proto_transfer_propose,
+ src_key,
+ conn_id,
+ 0,
+ &ficb->desc,
+ propose_length
+ ) ;
+ }
+ }
+ instrument_flow(dma_tcp,k_send_propose_rpc) ;
+ bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp, skb, 0, k_cattle_class) ;
+}
+#endif
+
+static int inject_scattergather(
+ dma_tcp_t *dma_tcp,
+ struct sk_buff *skb,
+ unsigned int my_injection_group,
+ unsigned int desired_fifo
+)
+{
+ frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ;
+ unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+ struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+ unsigned int aligned_payload_length = ficb->desc.msg_length ;
+ unsigned int x=ficb->desc.hwHdr.X ;
+ unsigned int y=ficb->desc.hwHdr.Y ;
+ unsigned int z=ficb->desc.hwHdr.Z ;
+ unsigned int f ;
+ unsigned int dest_offset=k_abbreviate_headlen ? (aligned_payload_length+eth->h_source[0]): aligned_payload_length ;
+ unsigned int base_offset=ficb->desc.base_offset ;
+ unsigned int rctr=ficb->desc.hwHdr.rDMA_Counter % DMA_NUM_COUNTERS_PER_GROUP ;
+ struct iphdr *iph = (struct iphdr *)(eth+1) ;
+ unsigned int daddr=iph->daddr ;
+
+ DMA_InjDescriptor_t descVector[MAX_SKB_FRAGS] ;
+ DMA_InjDescriptor_t * descPtr[1+MAX_SKB_FRAGS] ;
+ unsigned int total_inj_length = ficb->desc.msg_length ;
+ TRACEN(k_t_scattergather|k_t_sgdiag,"injecting, base_offset=0x%04x length=0x%04x my_injection_group=%d desired_fifo=%d dest_offset=0x%04x",
+ base_offset,ficb->desc.msg_length,my_injection_group,desired_fifo, dest_offset) ;
+
+ /* Prepare the initial not-fragment part */
+ descPtr[0] = &ficb->desc ;
+ /* scatter-gather fragments to be pushed out here */
+ for(f=0;f<nr_frags;f+=1)
+ {
+ struct skb_frag_struct* frag = &skb_shinfo(skb)->frags[f];
+ struct page *page = frag->page ;
+ unsigned int page_offset=frag->page_offset ;
+ unsigned int size = frag->size ;
+ dma_addr_t buffAddr = dma_map_page(NULL, page, page_offset, size, DMA_TO_DEVICE);
+ TRACEN(k_t_scattergather|k_t_sgdiag,"f=%d page=%p page_offset=0x%04x size=0x%04x buffAddr=0x%08llx dest_offset=0x%04x",
+ f,page,page_offset,size,buffAddr,dest_offset) ;
+ total_inj_length += size ;
+ if( 0 != size)
+ {
+ create_dma_descriptor_direct_put_offset(dma_tcp,x,y,z,k_injCounterId,rctr,buffAddr,size,descVector+f,dest_offset) ;
+ }
+ else
+ {
+ TRACEN(k_t_request,"(I) frag length zero") ;
+ DMA_ZeroOutDescriptor(descVector+f) ;
+ instrument_flow(dma_tcp,k_fraglength_zero) ;
+ }
+ descPtr[1+f]=descVector+f ;
+ dest_offset += size ;
+
+ }
+ TRACEN(k_t_sgdiag,"Injecting tgt=[%d,%d,%d] length=0x%04x ctr=0x%02x",x,y,z,total_inj_length,rctr) ;
+
+
+ TRACEN(k_t_scattergather ,"tgt=[%d %d %d] daddr=%d.%d.%d.%d tot_len=0x%04x, length=0x%04x headlen=0x%04x data_len=0x%04x dest_offset=0x%04x nr_frags=%d fragsizes[0x%04x 0x%04x 0x%04x] counter=0x%02x injfifo[%d %02x]\n",
+ x,y,z,
+ daddr>>24, (daddr >> 16) & 0xff,(daddr >> 8) & 0xff, daddr & 0xff,iph->tot_len,
+ skb->len,skb_headlen(skb), skb->data_len, dest_offset,
+ nr_frags,skb_shinfo(skb)->frags[0].size,skb_shinfo(skb)->frags[1].size,skb_shinfo(skb)->frags[2].size,rctr,my_injection_group,desired_fifo ) ;
+ if( skb_headlen(skb) < sizeof(struct ethhdr)+sizeof(struct iphdr))
+ {
+ TRACEN(k_t_request,"(!!!) length=0x%04x data_len=0x%04x nr_frags=%d fragsizes[0x%04x 0x%04x 0x%04x]",skb->len, skb->data_len, nr_frags,skb_shinfo(skb)->frags[0].size,skb_shinfo(skb)->frags[1].size,skb_shinfo(skb)->frags[2].size) ;
+ display_skb_structure(skb) ;
+ }
+ return inject_dma_descriptors_adaptive(dma_tcp,my_injection_group,desired_fifo,descPtr,1+nr_frags) ;
+
+}
+/* Send-and-free a frame with an already-prepared injection descriptor (which might be DMA-put or FIFO-put) */
+static int bgp_dma_tcp_s_and_f_frames_prepared(
+ dma_tcp_t *dma_tcp,
+ struct sk_buff *skb,
+ unsigned int queue_at_head,
+ unsigned int transport_class
+ )
+ {
+ unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+ unsigned int is_scattergather = (nr_frags > 0 ) ;
+ unsigned int payload_length = (skb -> len) - (skb->data_len) ;
+ unsigned int payload_address = (unsigned int)(skb->data) ;
+ unsigned int aligned_payload_address = payload_address & (~ 0x0f) ;
+ unsigned int pad_head = payload_address & 0x0f ;
+ unsigned int aligned_payload_length = payload_length + pad_head ;
+ #if 1
+ unsigned int use_taxi = 0 ;
+ #else
+ unsigned int use_taxi = (aligned_payload_length<=k_injection_packet_size) && (0 == nr_frags);
+ #endif
+ unsigned long flags ;
+ unsigned int current_injection_used=0xffffffff ;
+
+ int ret = 0;
+ int ring_ok ;
+
+ int my_injection_group ;
+ skb_group_t skb_group ;
+ frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ;
+ unsigned int x=ficb->desc.hwHdr.X ;
+ unsigned int y=ficb->desc.hwHdr.Y ;
+ unsigned int z=ficb->desc.hwHdr.Z ;
+ unsigned int header_dma_length=ficb->desc.msg_length ; // If this is zero, then we can free the skb as soon as its 'frags' are in software injection fifo
+ TRACEN(k_t_general ,"(>)skb=%p (%02x,%02x,%02x) data=%p length=%d data_len=%d nr_frags=%d", skb,x,y,z,skb->data, skb->len, skb->data_len, nr_frags);
+ if(is_scattergather ) instrument_flow(dma_tcp,k_scattergather) ;
+
+ skb_group_init(&skb_group) ;
+
+ TRACEN(k_t_general, "(=)(I) testdma: Sending to (%d,%d,%d)",
+ x, y, z );
+
+/* Make sure we're not trying to send off the partition or to self */
+ if( k_verify_target)
+ {
+ if( offfabric(&(dma_tcp->extent),x,y,z))
+ {
+ TRACEN(k_t_error, "(W) Target (%d,%d,%d) not in range",x,y,z) ;
+ WARN_ON(1) ;
+ dev_kfree_skb(skb) ;
+ return -EINVAL;
+ }
+ if( selfsend(&(dma_tcp->location),x,y,z))
+ {
+ TRACEN(k_t_error, "(W) Self-send not supported by hardware (%d %d %d)",x,y,z) ;
+ WARN_ON(1) ;
+ dev_kfree_skb(skb) ;
+ return -EINVAL;
+ }
+ }
+ TRACEN(k_t_protocol,"(=)sending packet to (%02x,%02x,%02x) length=%d",
+ x,y,z,skb->len) ;
+
+ /* copy descriptor into the inj fifo */
+ {
+ unsigned int desired_fifo=((transport_class != k_cattle_class) && (aligned_payload_length<=k_injection_packet_size) && (0 == nr_frags)) ? (k_skb_controlling_directions-1) : select_transmission_fifo(dma_tcp,x,y,z) ;
+ my_injection_group=injection_group_hash(dma_tcp,x,y,z) ;
+ spin_lock_irqsave(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
+ {
+ unsigned int src_key = (dma_tcp->src_key << 4) | pad_head ; /* Everything to a given node will go on the same stream, no point coding injection group in */
+ /* Work out which buffer we are going to use for the packet stream */
+ idma_direction_t * buffer = dma_tcp->idma.idma_core[my_injection_group].idma_direction+desired_fifo ;
+ /* Set up the payload */
+ unsigned int bhx = buffer->buffer_head_index ;
+ unsigned int lastx = packet_mod(bhx) ;
+ unsigned int fifo_initial_head = dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].fifo_initial_head ;
+ unsigned int fifo_current_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
+ unsigned int fifo_current_tail =
+ (unsigned int) DMA_InjFifoGetTailById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[my_injection_group*k_injecting_directions+desired_fifo]) ;
+ unsigned int headx = (fifo_current_head-fifo_initial_head) >> 5 ;
+ unsigned int tailx = (fifo_current_tail-fifo_initial_head) >> 5 ;
+ unsigned int injection_count ;
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ unsigned long long now=get_powerpc_tb() ;
+ *(unsigned long long*)(skb->cb) = now ;
+#endif
+ current_injection_used=packet_mod(tailx-headx) ;
+ /* If the network is backing up, we may have to skip out here, */
+ /* so that we don't overwrite unsent data. */
+ TRACEN(k_t_general ,"Runway desired_fifo=%d headx=%d tailx=%d bhx=%d current_injection_used=%d",
+ desired_fifo,headx,tailx,bhx,current_injection_used) ;
+ if( current_injection_used > buffer->injection_high_watermark )
+ {
+ buffer->injection_high_watermark=current_injection_used ; /* Congestion statistic */
+ }
+ {
+ /* Need to have room to inject the in-skbuff data plus all attached 'fragments', each of which may be sent in 3 injections */
+ if( current_injection_used+3*(MAX_SKB_FRAGS+1) < k_injection_packet_count-1)
+ {
+ ring_ok = 1 ;
+ TRACEN(k_t_general,"Runway slot granted") ;
+ }
+ else
+ {
+ ring_ok = 0 ;
+ TRACEN(k_t_congestion,"Runway slot denied tailx=%08x headx=%08x",tailx,headx) ;
+ }
+ }
+ TRACEN(k_t_general ,"Injection my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x nr_frags=%d",
+ my_injection_group, desired_fifo, bhx, headx,tailx,nr_frags
+ ) ;
+ if ( ring_ok )
+ {
+ /* We are going to send something. */
+
+ /* Bump the injection counter. Actually only needs doing once per 4GB or so */
+ ret=DMA_CounterSetValueWideOpenById ( & dma_tcp->injCounterGroup, k_injCounterId, 0xffffffff );
+
+ /* and inject it */
+ if(use_taxi)
+ {
+ injection_count = inject_into_dma_taxi(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z,my_injection_group,desired_fifo,
+ dma_tcp->proto_issue_frames_single,src_key) ;
+ }
+ else
+ {
+ if( is_scattergather && 0 != ficb->free_when_done)
+ {
+ injection_count = inject_scattergather(
+ dma_tcp,skb,my_injection_group,desired_fifo
+ ) ;
+ }
+ else
+ {
+ /* Prop, or accept, or unfragmented skbuff */
+ injection_count = inject_dma_descriptor_adaptive(dma_tcp,my_injection_group,desired_fifo,
+ &ficb->desc
+ ) ;
+ }
+
+ }
+ {
+ unsigned int nhx=packet_mod(bhx+injection_count) ;
+ /* Remember where we will be pushing the next injection in */
+ TRACEN(k_t_detail,"Next injection will be at nhx=0x%08x",nhx) ;
+ buffer->buffer_head_index = nhx ;
+ /* Record the skbuff so it can be freed later, after data is DMA'd out */
+ if( ficb->free_when_done && header_dma_length > 0 )
+ {
+ TRACEN(k_t_detail,"Saving skb=%p at [%p] for freeing later",skb,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array+nhx) ;
+ dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array[nhx] = skb ;
+ }
+ }
+ /* hang on to the skbs until they are sent ... */
+ if( current_injection_used != 0xffffffff)
+ {
+ unsigned int btx = buffer->buffer_tail_index ; /* This indexes the oldest skbuff that might still be pending send by the DMA unit */
+ int skql2 = packet_mod(bhx-btx) ;
+ int count_needing_freeing = skql2-current_injection_used ;
+ int count_to_free = ( count_needing_freeing > k_skb_group_count) ? k_skb_group_count : count_needing_freeing ;
+ TRACEN(k_t_detail ,"current_injection_used=%d btx=%d skql2=%d count_needing_freeing=%d count_to_free=%d",current_injection_used,btx,skql2,count_needing_freeing,count_to_free);
+ skb_group_queue(&skb_group,dma_tcp->idma.idma_core[my_injection_group].idma_direction[desired_fifo].idma_skb_array->skb_array,btx,count_to_free
+#if defined(TRACK_LIFETIME_IN_FIFO)
+ , my_injection_group, desired_fifo, now
+#endif
+ ) ;
+ btx = packet_mod(btx+count_to_free) ;
+ buffer->buffer_tail_index = btx ;
+ TRACEN(k_t_detail ,"buffer=%p buffer->buffer_tail_index=%d",buffer,buffer->buffer_tail_index);
+ }
+ }
+ else
+ {
+ TRACEN(k_t_congestion,"Would overrun my_injection_group=%d desired_fifo=%d bhx=0x%08x headx=%08x tailx=%08x lastx=%08x",
+ my_injection_group, desired_fifo, bhx, headx,tailx, lastx
+ ) ;
+ }
+ }
+ spin_unlock_irqrestore(&dma_tcp->dirInjectionLock[my_injection_group*k_injecting_directions+desired_fifo],flags) ;
+ skb_group_free(&skb_group) ;
+ if( k_async_free ) mod_timer(&dma_tcp->transmission_free_skb_timer, jiffies+1) ;
+ if( 0 == ring_ok )
+ {
+ TRACEN(k_t_congestion,"(=)Queuing skb=%p desired_fifo=%d (%u %u %u)", skb,desired_fifo,x,y,z) ;
+ if( queue_at_head)
+ {
+ skb_queue_head(dma_tcp->inj_queue+desired_fifo, skb) ;
+ }
+ else
+ {
+ skb_queue_tail(dma_tcp->inj_queue+desired_fifo, skb) ;
+ }
+ }
+ else
+ {
+ if( 0 == header_dma_length)
+ {
+ TRACEN(k_t_general,"Freeing skb=%p, its header has left the node",skb) ;
+ dev_kfree_skb(skb) ;
+ }
+ }
+
+
+
+ TRACEN(k_t_general ,"(<) ring_ok=%d desired_fifo=%d",ring_ok,desired_fifo);
+
+ return ring_ok ? desired_fifo : -1 ;
+ }
+
+ }
+
+/* ... return 'direction' if we sent the packet, '-1' if we queued it */
+static int bgp_dma_tcp_s_and_f_frames(
+ dma_tcp_t *dma_tcp,
+ struct sk_buff *skb,
+ unsigned int queue_at_head
+ )
+{
+#if defined(USE_ADAPTIVE_ROUTING)
+ struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+ unsigned int x = eth->h_dest[3] ;
+ unsigned int y = eth->h_dest[4] ;
+ unsigned int z = eth->h_dest[5] ;
+ unsigned int payload_length = (skb -> len) - (skb->data_len) ;
+ unsigned int payload_address = (unsigned int)(skb->data) ;
+ unsigned int aligned_payload_address = payload_address & (~ 0x0f) ;
+ unsigned int pad_head = payload_address & 0x0f ;
+ unsigned int src_key = (dma_tcp->src_key << 4) | pad_head ; /* Everything to a given node will go on the same stream, no point coding injection group in */
+ unsigned int aligned_payload_length = payload_length + pad_head ;
+ frame_injection_cb * ficb = (frame_injection_cb *) skb->cb ;
+
+ unsigned int dest_key = x*dma_tcp->extent.coordinate[1]*dma_tcp->extent.coordinate[2]
+ +y*dma_tcp->extent.coordinate[2]
+ +z ;
+ unsigned int conn_id = take_tx_conn_id(&dma_tcp->tx_mux,dest_key) ;
+ instrument_flow(dma_tcp,k_send_eager) ;
+ ficb->free_when_done = 1 ;
+
+ if(TRACING(k_t_sgdiag))
+ {
+ diag_skb_structure(skb) ;
+ }
+ create_dma_descriptor_adaptive(dma_tcp,(void *)aligned_payload_address,aligned_payload_length,x,y,z,
+ dma_tcp->proto_issue_frames_adaptive,src_key,conn_id, &ficb->desc
+ ) ;
+
+#endif
+ if( k_verify_ctlen)
+ {
+ unsigned int ctlen = counted_length(skb) ;
+ struct ethhdr *eth = (struct ethhdr *)(skb->data) ;
+ struct iphdr *iph = (struct iphdr *)(eth+1) ;
+ if( ctlen != iph->tot_len + sizeof(struct ethhdr))
+ {
+ TRACEN(k_t_error,"(E) Counted length mismatch, skb=%p, conuted_length=0x%04x, tot_len=0x%04x",skb,ctlen,iph->tot_len ) ;
+ display_skb_structure(skb) ;
+ display_iphdr(iph) ;
+ dev_kfree_skb(skb) ; // It would cause trouble later, to try and send it. So drop it.
+ instrument_flow(dma_tcp,k_counted_length_mismatch) ;
+ return 0 ; // Not really 'direction 0', but this will not cause the caller a problem.
+ }
+ }
+
+ return bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp,skb,queue_at_head, 0) ;
+}
+
+/* Try to clear a pending skbuff queue into the mem-fifo */
+/* return 0 if queue cleared */
+/* -1 if the queue cannot be cleared because the FIFO gets full */
+static int bgp_dma_tcp_try_to_clear_queue(dma_tcp_t *dma_tcp, unsigned int direction) noinline ;
+static int bgp_dma_tcp_try_to_clear_queue(dma_tcp_t *dma_tcp, unsigned int direction)
+ {
+ struct sk_buff_head *skq = dma_tcp->inj_queue+direction ;
+ TRACEN(k_t_general,"(>) direction=%u",direction );
+ if( ! skb_queue_empty(skq))
+ {
+ /* We sent something, and there is a pending list which we might be able to send as well */
+ for(;;)
+ {
+ struct sk_buff * askb = skb_dequeue(skq) ;
+ if( askb)
+ {
+ TRACEN(k_t_congestion,"(=)Dequeuing dir=%d askb=%p length=%u", direction, askb,askb->len) ;
+ {
+ int arc= bgp_dma_tcp_s_and_f_frames_prepared(dma_tcp,askb,1,k_cattle_class) ;
+ if( -1 == arc)
+ {
+ TRACEN(k_t_congestion,"still-congested dir=%d",direction );
+ TRACEN(k_t_general,"(<) still-congested" );
+ instrument_flow(dma_tcp,k_queue_filled_propose_fifo) ;
+ return -1 ; /* Queue not cleared */
+ }
+ }
+ }
+ else
+ {
+ TRACEN(k_t_congestion,"(=)Dequeuing askb=NULL") ;
+ break ;
+ }
+
+ }
+
+ }
+
+ TRACEN(k_t_general,"(<) clear" );
+ return 0 ; /* Queue cleared */
+ }
+
+static void dma_tcp_frames_runway_check(unsigned long parm)
+ {
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ int direction ;
+ int anything_queued = 0 ;
+ TRACEN(k_t_congestion,"(>)");
+ for(direction=0;direction<k_injecting_directions;direction+=1)
+ {
+ anything_queued += bgp_dma_tcp_try_to_clear_queue(dma_tcp,direction) ;
+ }
+ if( anything_queued)
+ {
+ mod_timer(&dma_tcp->runway_check_timer,jiffies+1) ; /* Redrive on the next timer tick */
+ }
+ TRACEN(k_t_congestion,"(<) anything_queued=%d",anything_queued);
+ }
+
+/* Take an skbuff bound for (x,y,z), and either put it in the software FIFO or queue it for when congestion abates */
+int bgp_dma_tcp_send_and_free_frames( struct sk_buff *skb )
+{
+ TRACEN(k_t_general,"(>)skb=%p data=%p length=%d", skb,skb->data, skb->len) ;
+ {
+ dma_tcp_t *dma_tcp = &dma_tcp_state ;
+ dma_tcp->tx_by_core[smp_processor_id() & 3] += 1 ; /* Stats on which core(s) are busy */
+#if defined(CONFIG_BGP_STATISTICS)
+ {
+ struct ethhdr *eth = (struct ethhdr *) (skb->data) ;
+ struct iphdr *iph=(struct iphdr *) (eth+1) ;
+ dma_tcp->bytes_sent += iph->tot_len ;
+ }
+#endif
+
+ if( 0 == skb_headlen(skb))
+ {
+ TRACEN(k_t_request,"(I) head length zero") ;
+ }
+
+#if defined(USE_SKB_TO_SKB)
+ if( skb->len > dma_tcp->eager_limit || 0 != skb_shinfo(skb)->nr_frags )
+ {
+ bgp_dma_tcp_s_and_f_frames_dma(dma_tcp,skb) ;
+ }
+ else
+#endif
+ {
+ int rc = bgp_dma_tcp_s_and_f_frames(dma_tcp,skb,
+ /* x,y,z, */
+ 0) ;
+ if( rc == -1)
+ {
+ mod_timer(&dma_tcp->runway_check_timer,jiffies+1) ; /* Redrive on the next timer tick */
+ }
+ }
+ }
+ TRACEN(k_t_general,"(<)");
+ return 0 ;
+}
+
+#if defined(ENABLE_LATENCY_TRACKING)
+
+static unsigned int isqrt(unsigned int x)
+ {
+ unsigned int rc=0 ;
+ unsigned int i ;
+ for( i=0;i<16;i+=1)
+ {
+ unsigned int c= rc | (0x8000 >> i) ;
+ if( c*c <= x ) rc = c ;
+ }
+ return rc ;
+ }
+#endif
+
+#if defined(TRACK_SEQUENCE)
+static void dma_tcp_frames_show_sequence(dma_tcp_t *dma_tcp)
+ {
+ unsigned int x ;
+ unsigned int y ;
+ unsigned int z ;
+ unsigned int core ;
+ unsigned int xsize = dma_tcp->extent.coordinate[0] ;
+ unsigned int ysize = dma_tcp->extent.coordinate[1] ;
+ unsigned int zsize = dma_tcp->extent.coordinate[2] ;
+ unsigned int myx = dma_tcp->location.coordinate[0] ;
+ unsigned int myy = dma_tcp->location.coordinate[1] ;
+ unsigned int myz = dma_tcp->location.coordinate[2] ;
+ for(x=0;x<xsize; x+=1 )
+ {
+ for( y = 0; y<ysize; y+=1)
+ {
+ for( z = 0 ; z<zsize; z+=1 )
+ {
+ unsigned int slot_base = x*(ysize*zsize) + y*zsize + z ;
+ for( core=0; core<k_injecting_cores; core+=1)
+ {
+ unsigned int slot = (slot_base << 2) | core ;
+ unsigned int txcount = send_sequences[slot] ;
+ unsigned int rxcount = receive_sequences[slot] ;
+ if( txcount || rxcount)
+ {
+ TRACEN(k_t_request,"( %d %d %d ) show_sequence( %d %d %d %d )=( %d %d )", myx, myy, myz, x,y,z,core, txcount,rxcount) ;
+ }
+ }
+ }
+ }
+ }
+ }
+#endif
+
+#if defined(ENABLE_PROGRESS_TRACKING)
+static void dma_tcp_frames_show_progress(dma_tcp_t *dma_tcp)
+ {
+ unsigned int x ;
+ unsigned int y ;
+ unsigned int z ;
+ unsigned int core ;
+ unsigned int xsize = dma_tcp->extent.coordinate[0] ;
+ unsigned int ysize = dma_tcp->extent.coordinate[1] ;
+ unsigned int zsize = dma_tcp->extent.coordinate[2] ;
+ unsigned int myx = dma_tcp->location.coordinate[0] ;
+ unsigned int myy = dma_tcp->location.coordinate[1] ;
+ unsigned int myz = dma_tcp->location.coordinate[2] ;
+ unsigned long long now=get_powerpc_tb() ;
+ TRACEN(k_t_entryexit,">") ;
+ for(x=0;x<xsize; x+=1 )
+ {
+ for( y = 0; y<ysize; y+=1)
+ {
+ for( z = 0 ; z<zsize; z+=1 )
+ {
+ unsigned int slot_base = x*(ysize*zsize) + y*zsize + z ;
+ for( core=0; core<k_injecting_cores; core+=1)
+ {
+ unsigned int slot = (slot_base << 2) | core ;
+ if( get_rcv_skb(&dma_tcp->rcvdemux,slot))
+ {
+ unsigned long long timestamp=get_timestamp(&dma_tcp->rcvdemux,slot) ;
+ unsigned long long age=now-timestamp ;
+ TRACEN(k_t_request,"( %d %d %d ) age( %d %d %d %d )= 0x%08x%08x !!!", myx, myy, myz, x,y,z,core,(unsigned int)(age>>32),(unsigned int)age) ;
+ }
+ }
+ }
+ }
+ }
+ TRACEN(k_t_entryexit,"<") ;
+ }
+#endif
+
+void __init
+balancer_init(bgp_dma_balancer *balancer)
+{
+ int x;
+ for(x=0;x<k_pending_rcv_skb_classes;x+=1)
+ {
+ TRACEN(k_t_general,"balancer init[%d]",x) ;
+ skb_queue_head_init(&balancer->b[x].pending_rcv_skbs) ;
+ balancer->b[x].outstanding_counters=0 ;
+ }
+}
+
+/*
+ * We set up 32 software injection FIFOs. We arrange them in 4 groups of 8; the group number is chosen as a function of the
+ * destination node, For the group of 8, we use 6 FIFOs to control 'bulk data' nominally one for each outbound link (though
+ * adaptive routing may take a packet out of a different link when the time comes); 1 FIFO to control single-packet frames
+ * which are sent high-priority because they may be 'ack' frames which will enable more data to flow from a far-end node; and
+ * 1 FIFO to control 'accept' packets which are sent high-priority because a scarce local resource (a reception counter) has been
+ * allocated to the transfer and we would like it underway as soon as possible.
+ */
+
+void __init
+dma_tcp_frames_init(dma_tcp_t *dma_tcp)
+ {
+ TRACEN(k_t_general,"sizeof(frame_injection_cb)=%d sizeof(DMA_PacketHeader_t)=%d sizeof(DMA_InjDescriptor_t)=%d",sizeof(frame_injection_cb),sizeof(DMA_PacketHeader_t),sizeof(DMA_InjDescriptor_t)) ;
+
+ if( k_async_free ) setup_timer(&dma_tcp->transmission_free_skb_timer,dma_tcp_frames_transmission_free_skb,0) ;
+ setup_timer(&dma_tcp->runway_check_timer,dma_tcp_frames_runway_check,0) ;
+ dma_tcp->rcv_checked_time = jiffies ;
+ dma_tcp->packets_received_count = 0 ;
+ allocate_idma(&dma_tcp->idma) ; /* Buffering for packets-style injection DMA */
+ allocate_rcv(&dma_tcp->rcvdemux,dma_tcp->node_count) ; /* Demultiplexing for packets-style reception */
+#if defined(USE_ADAPTIVE_ROUTING)
+ allocate_tx(&dma_tcp->tx_mux,dma_tcp->node_count) ; /* Multiplexing for adaptive-routing transmit */
+#endif
+#if defined(TRACK_SEQUENCE)
+ track_sequence_init(dma_tcp->node_count) ;
+#endif
+ init_demux_table(dma_tcp, dma_tcp->node_count) ;
+ /* Allocate injection FIFOs for 'packets' style access */
+ {
+ int core ;
+ int direction ;
+ for( core=0; core< k_injecting_cores; core += 1 )
+ {
+ for( direction=0; direction< k_injecting_directions; direction += 1 )
+ {
+ dma_tcp->injFifoFramesPri[ core*k_injecting_directions+direction ] = 0 ;
+ dma_tcp->injFifoFramesLoc[ core*k_injecting_directions+direction ] = 0 ;
+ dma_tcp->injFifoFramesIds[ core*k_injecting_directions+direction ] = core*k_injecting_directions+direction ;
+ }
+ dma_tcp->injFifoFramesMap[ core*k_injecting_directions+0 ] = 0x80; /* Set deterministic injection FIFO per direction */
+ dma_tcp->injFifoFramesMap[ core*k_injecting_directions+1 ] = 0x40; /* Set deterministic injection FIFO per direction */
+ dma_tcp->injFifoFramesMap[ core*k_injecting_directions+2 ] = 0x20; /* Set deterministic injection FIFO per direction */
+ dma_tcp->injFifoFramesMap[ core*k_injecting_directions+3 ] = 0x08; /* Set deterministic injection FIFO per direction */
+ dma_tcp->injFifoFramesMap[ core*k_injecting_directions+4 ] = 0x04; /* Set deterministic injection FIFO per direction */
+ dma_tcp->injFifoFramesMap[ core*k_injecting_directions+5 ] = 0x02; /* Set deterministic injection FIFO per direction */
+ dma_tcp->injFifoFramesMap[ core*k_injecting_directions+6 ] = 0x11; /* Set 'high priority' FIFO for taxi channel */
+ dma_tcp->injFifoFramesPri[ core*k_injecting_directions+k_injecting_directions-1 ] = 1 ; // 'high priority' for taxi channel
+/* dma_tcp->injFifoFramesMap[ core*k_injecting_directions+6 ] = 0xee; // Set any FIFO for taxi channel */
+#if defined(USE_SKB_TO_SKB)
+ dma_tcp->injFifoFramesMap[ core*k_injecting_directions+7 ] = 0x11; /* Set 'high priority' FIFO for propose/accept channel */
+/* dma_tcp->injFifoFramesMap[ core*k_injecting_directions+7 ] = 0xee; // propose/accept channel can go in any fifo, but regular pri */
+ dma_tcp->injFifoFramesPri[ core*k_injecting_directions+7 ] = 1 ; // 'high priority' for propose/accept channel
+#endif
+ }
+ }
+ {
+ int ret = DMA_InjFifoGroupAllocate( k_InjectionFifoGroupFrames,
+ k_injecting_cores*k_injecting_directions, /* num inj fifos */
+ dma_tcp->injFifoFramesIds,
+ dma_tcp->injFifoFramesPri,
+ dma_tcp->injFifoFramesLoc,
+ dma_tcp->injFifoFramesMap,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ & dma_tcp->injFifoGroupFrames );
+
+ TRACEN(k_t_general,"(=)DMA_InjFifoGroupAllocate rc=%d", ret );
+ }
+
+ {
+ int core ;
+ int direction ;
+ for( core=0; core< k_injecting_cores; core += 1 )
+ {
+ for( direction=0; direction< k_injecting_directions; direction += 1 )
+ {
+ int ret = DMA_InjFifoInitById( &dma_tcp->injFifoGroupFrames,
+ dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction],
+ dma_tcp->idma.idma_core[core].idma_direction[direction].idma_fifo,
+ dma_tcp->idma.idma_core[core].idma_direction[direction].idma_fifo, /* head */
+ dma_tcp->idma.idma_core[core].idma_direction[direction].idma_fifo+1 /* end */
+ );
+ dma_tcp->idma.idma_core[core].idma_direction[direction].fifo_initial_head =
+ (unsigned int) DMA_InjFifoGetHeadById( &dma_tcp->injFifoGroupFrames, dma_tcp->injFifoFramesIds[core*k_injecting_directions+direction]) ;
+ TRACEN(k_t_general,"(=)DMA_InjFifoInitById rc=%d initial_head=0x%08x", ret , dma_tcp->idma.idma_core[core].idma_direction[direction].fifo_initial_head);
+ }
+ }
+ }
+ /* register receive functions for the memfifo packets */
+ dma_tcp->proto_issue_frames_single=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDataSingleActor, dma_tcp, 0, 0);
+#if defined(USE_ADAPTIVE_ROUTING)
+ dma_tcp->proto_issue_frames_adaptive=DMA_RecFifoRegisterRecvFunction(issueInlineFrameDataAdaptiveActor, dma_tcp, 0, 0);
+#endif
+
+#if defined(USE_SKB_TO_SKB)
+ dma_tcp->proto_transfer_propose=DMA_RecFifoRegisterRecvFunction(issuePropActor, dma_tcp, 0, 0);
+ /* If we want to start up with everything flowing through the reception FIFO , do this by setting the 'eager limit' longer than the largest IP frame */
+ dma_tcp->eager_limit = k_force_eager_flow ? 10000000 : 1024 ; /* Frames smaller than this get sent through the FIFO rather than the DMA (set it above 65536 to run everything through receive FIFO) */
+ balancer_init(&dma_tcp->balancer) ;
+#endif
+ dma_tcp_diagnose_init(dma_tcp) ;
+ TRACEN(k_t_general,"(=)DMA_RecFifoRegisterRecvFunction proto_issue_frames_single=%d",
+ dma_tcp->proto_issue_frames_single);
+ }
diff --git a/drivers/net/bgp_torus/bgp_dma_tcp_quads.h b/drivers/net/bgp_torus/bgp_dma_tcp_quads.h
new file mode 100644
index 00000000000000..ecc4815a6641f8
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_dma_tcp_quads.h
@@ -0,0 +1,394 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Quadword ops for copying data, in particular torus-packet-sized
+ * (240 byte) sequences. Not currently used, but provided for
+ * reference.
+ *
+ *
+ ********************************************************************/
+#ifndef __BGP_DMA_TCP_QUADS_H__
+#define __BGP_DMA_TCP_QUADS_H__
+
+/* TODO: take away the use of FP regs, now that software FIFO frames are 'rare', so we can avoid FP-in-kernel */
+/* Drop 240 bytes of payload from regs into 'software FIFO' */
+static inline void torus_frame_payload_store(
+ void * payloadptr)
+ {
+ unsigned int index1 ;
+ unsigned int index2 ;
+ torus_frame_payload *payload=payloadptr ;
+
+ TRACEN(k_t_detail, "torus_payload_store payload=%p",payload) ;
+ asm (
+ "li %[index1],16 \n\t" /* Indexing values */
+ "stfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li %[index2],32 \n\t" /* Indexing values */
+ "stfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */
+ "li %[index1],48 \n\t" /* Indexing values */
+ "stfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */
+ "li %[index2],64 \n\t" /* Indexing values */
+ "stfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */
+ "li %[index1],80 \n\t" /* Indexing values */
+ "stfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */
+ "li %[index2],96 \n\t" /* Indexing values */
+ "stfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */
+ "li %[index1],112 \n\t" /* Indexing values */
+ "stfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */
+ "li %[index2],128 \n\t" /* Indexing values */
+ "stfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */
+ "li %[index1],144 \n\t" /* Indexing values */
+ "stfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */
+ "li %[index2],160 \n\t" /* Indexing values */
+ "stfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */
+ "li %[index1],176 \n\t" /* Indexing values */
+ "stfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */
+ "li %[index2],192 \n\t" /* Indexing values */
+ "stfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */
+ "li %[index1],208 \n\t" /* Indexing values */
+ "stfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */
+ "li %[index2],224 \n\t" /* Indexing values */
+ "stfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */
+ "stfpdx 15,%[index2],%[payload] \n\t" /* F3=Q15load */
+ : /* outputs */
+ "=m" (*payload),
+ [index1] "=&b" (index1),
+ [index2] "=&b" (index2)
+ : /* Inputs */
+ [payload] "b" (payload) /* inputs */
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14", "fr15"
+ );
+ }
+
+/* Load 240 bytes of payload from memory into regs */
+static inline void torus_frame_payload_load(
+ void * payloadptr)
+ {
+ unsigned int index1 ;
+ unsigned int index2 ;
+ torus_frame_payload *payload=payloadptr ;
+
+ TRACEN(k_t_detail, "torus_payload_load payload=%p",payload) ;
+ asm (
+ "li %[index1],16 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[payload] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li %[index2],32 \n\t" /* Indexing values */
+ "lfpdx 2,%[index1],%[payload] \n\t" /* F2=Q2 load */
+ "li %[index1],48 \n\t" /* Indexing values */
+ "lfpdx 3,%[index2],%[payload] \n\t" /* F3=Q3 load */
+ "li %[index2],64 \n\t" /* Indexing values */
+ "lfpdx 4,%[index1],%[payload] \n\t" /* F4=Q4 load */
+ "li %[index1],80 \n\t" /* Indexing values */
+ "lfpdx 5,%[index2],%[payload] \n\t" /* F5=Q5 load */
+ "li %[index2],96 \n\t" /* Indexing values */
+ "lfpdx 6,%[index1],%[payload] \n\t" /* F6=Q6 load */
+ "li %[index1],112 \n\t" /* Indexing values */
+ "lfpdx 7,%[index2],%[payload] \n\t" /* F7=Q7 load */
+ "li %[index2],128 \n\t" /* Indexing values */
+ "lfpdx 8,%[index1],%[payload] \n\t" /* F8=Q8 load */
+ "li %[index1],144 \n\t" /* Indexing values */
+ "lfpdx 9,%[index2],%[payload] \n\t" /* F9=Q9 load */
+ "li %[index2],160 \n\t" /* Indexing values */
+ "lfpdx 10,%[index1],%[payload] \n\t" /* F0=Q10 load */
+ "li %[index1],176 \n\t" /* Indexing values */
+ "lfpdx 11,%[index2],%[payload] \n\t" /* F1=Q11 load */
+ "li %[index2],192 \n\t" /* Indexing values */
+ "lfpdx 12,%[index1],%[payload] \n\t" /* F2=Q12 load */
+ "li %[index1],208 \n\t" /* Indexing values */
+ "lfpdx 13,%[index2],%[payload] \n\t" /* F3=Q13 load */
+ "li %[index2],224 \n\t" /* Indexing values */
+ "lfpdx 14,%[index1],%[payload] \n\t" /* F4=Q14 load */
+ "lfpdx 15,%[index2],%[payload] \n\t" /* F3=Q15 load */
+ : /* outputs */
+ "=m" (*payload),
+ [index1] "=&b" (index1),
+ [index2] "=&b" (index2)
+ : /* Inputs */
+ [payload] "b" (payload) /* inputs */
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14", "fr15"
+ );
+ }
+
+static inline int torus_frame_payload_memcpy_base(
+ torus_frame_payload * target,
+ torus_frame_payload * source
+ )
+ {
+ unsigned int index1 ;
+ unsigned int index2 ;
+
+ TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ;
+ asm (
+ "li %[index1],16 \n\t" /* Indexing values */
+ "lfpdx 1,0,%[source] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li %[index2],32 \n\t" /* Indexing values */
+ "lfpdx 2,%[index1],%[source] \n\t" /* F2=Q2 load */
+ "li %[index1],48 \n\t" /* Indexing values */
+ "lfpdx 3,%[index2],%[source] \n\t" /* F3=Q3 load */
+ "li %[index2],64 \n\t" /* Indexing values */
+ "lfpdx 4,%[index1],%[source] \n\t" /* F4=Q4 load */
+ "li %[index1],80 \n\t" /* Indexing values */
+ "lfpdx 5,%[index2],%[source] \n\t" /* F5=Q5 load */
+ "li %[index2],96 \n\t" /* Indexing values */
+ "lfpdx 6,%[index1],%[source] \n\t" /* F6=Q6 load */
+ "li %[index1],112 \n\t" /* Indexing values */
+ "lfpdx 7,%[index2],%[source] \n\t" /* F7=Q7 load */
+ "li %[index2],128 \n\t" /* Indexing values */
+ "lfpdx 8,%[index1],%[source] \n\t" /* F8=Q8 load */
+ "li %[index1],144 \n\t" /* Indexing values */
+ "lfpdx 9,%[index2],%[source] \n\t" /* F9=Q9 load */
+ "li %[index2],160 \n\t" /* Indexing values */
+ "lfpdx 10,%[index1],%[source] \n\t" /* F0=Q10 load */
+ "li %[index1],176 \n\t" /* Indexing values */
+ "lfpdx 11,%[index2],%[source] \n\t" /* F1=Q11 load */
+ "li %[index2],192 \n\t" /* Indexing values */
+ "lfpdx 12,%[index1],%[source] \n\t" /* F2=Q12 load */
+ "li %[index1],208 \n\t" /* Indexing values */
+ "lfpdx 13,%[index2],%[source] \n\t" /* F3=Q13 load */
+ "li %[index2],224 \n\t" /* Indexing values */
+ "lfpdx 14,%[index1],%[source] \n\t" /* F4=Q14 load */
+ "lfpdx 15,%[index2],%[source] \n\t" /* F3=Q15 load */
+ "li %[index1],16 \n\t" /* Indexing values */
+ "stfpdx 1,0,%[target] \n\t" /* F1=Q1 load from (%[remaining_quads]) */
+ "li %[index2],32 \n\t" /* Indexing values */
+ "stfpdx 2,%[index1],%[target] \n\t" /* F2=Q2 load */
+ "li %[index1],48 \n\t" /* Indexing values */
+ "stfpdx 3,%[index2],%[target] \n\t" /* F3=Q3 load */
+ "li %[index2],64 \n\t" /* Indexing values */
+ "stfpdx 4,%[index1],%[target] \n\t" /* F4=Q4 load */
+ "li %[index1],80 \n\t" /* Indexing values */
+ "stfpdx 5,%[index2],%[target] \n\t" /* F5=Q5 load */
+ "li %[index2],96 \n\t" /* Indexing values */
+ "stfpdx 6,%[index1],%[target] \n\t" /* F6=Q6 load */
+ "li %[index1],112 \n\t" /* Indexing values */
+ "stfpdx 7,%[index2],%[target] \n\t" /* F7=Q7 load */
+ "li %[index2],128 \n\t" /* Indexing values */
+ "stfpdx 8,%[index1],%[target] \n\t" /* F8=Q8 load */
+ "li %[index1],144 \n\t" /* Indexing values */
+ "stfpdx 9,%[index2],%[target] \n\t" /* F9=Q9 load */
+ "li %[index2],160 \n\t" /* Indexing values */
+ "stfpdx 10,%[index1],%[target] \n\t" /* F0=Q10 load */
+ "li %[index1],176 \n\t" /* Indexing values */
+ "stfpdx 11,%[index2],%[target] \n\t" /* F1=Q11 load */
+ "li %[index2],192 \n\t" /* Indexing values */
+ "stfpdx 12,%[index1],%[target] \n\t" /* F2=Q12 load */
+ "li %[index1],208 \n\t" /* Indexing values */
+ "stfpdx 13,%[index2],%[target] \n\t" /* F3=Q13 load */
+ "li %[index2],224 \n\t" /* Indexing values */
+ "stfpdx 14,%[index1],%[target] \n\t" /* F4=Q14 load */
+ "stfpdx 15,%[index2],%[target] \n\t" /* F3=Q15load */
+ : /* outputs */
+ "=m" (*target),
+ [index1] "=&b" (index1),
+ [index2] "=&b" (index2)
+ : /* Inputs */
+ [source] "b" (source), /* inputs */
+ [target] "b" (target) /* inputs */
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14", "fr15"
+ );
+ return 0 ;
+ }
+#define loadreg(Reg,Name,Offset) \
+ "li %[index]," #Offset " \n\t" \
+ "lfpdx " #Reg ",%[index],%[" #Name "] \n\t"
+
+#define savereg(Reg,Name,Offset) \
+ "li %[index]," #Offset " \n\t" \
+ "stfpdx " #Reg ",%[index],%[" #Name "] \n\t"
+
+
+static inline int torus_frame_payload_memcpy(
+ torus_frame_payload * target,
+ torus_frame_payload * source
+ )
+ {
+ unsigned int index ;
+
+ TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ;
+ asm (
+ loadreg(0,source,0x00)
+ loadreg(1,source,0x10)
+ loadreg(2,source,0x20)
+ loadreg(3,source,0x30)
+ loadreg(4,source,0x40)
+ loadreg(5,source,0x50)
+ loadreg(6,source,0x60)
+ loadreg(7,source,0x70)
+ loadreg(8,source,0x80)
+ loadreg(9,source,0x90)
+ loadreg(10,source,0xa0)
+ loadreg(11,source,0xb0)
+ loadreg(12,source,0xc0)
+ loadreg(13,source,0xd0)
+ loadreg(14,source,0xe0)
+ savereg(0,target,0x00)
+ savereg(1,target,0x10)
+ savereg(2,target,0x20)
+ savereg(3,target,0x30)
+ savereg(4,target,0x40)
+ savereg(5,target,0x50)
+ savereg(6,target,0x60)
+ savereg(7,target,0x70)
+ savereg(8,target,0x80)
+ savereg(9,target,0x90)
+ savereg(10,target,0xa0)
+ savereg(11,target,0xb0)
+ savereg(12,target,0xc0)
+ loadreg(0,source,0xf0) /* Speculate that we will need this soon */
+ savereg(13,target,0xd0)
+ loadreg(1,source,0x110) /* Speculate that we will need this soon */
+ savereg(14,target,0xe0)
+ loadreg(2,source,0x130) /* Speculate that we will need this soon */
+
+ : /* outputs */
+ "=m" (*target),
+ [index] "=&b" (index)
+ : /* Inputs */
+ [source] "b" (source), /* inputs */
+ [target] "b" (target) /* inputs */
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14"
+ );
+ return 0 ;
+ }
+
+static inline int torus_frame_payload_memcpy_try1(
+ torus_frame_payload * target,
+ torus_frame_payload * source
+ )
+ {
+ unsigned int index ;
+
+ TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ;
+ asm (
+ loadreg(0,source,0x00)
+ loadreg(2,source,0x20)
+ loadreg(4,source,0x40)
+ loadreg(1,source,0x10)
+ savereg(0,target,0x00)
+ loadreg(6,source,0x60)
+ savereg(2,target,0x20)
+ loadreg(3,source,0x30)
+ savereg(4,target,0x40)
+ loadreg(8,source,0x80)
+ savereg(1,target,0x10)
+ loadreg(5,source,0x50)
+ savereg(6,target,0x60)
+ loadreg(10,source,0xa0)
+ savereg(3,target,0x30)
+ loadreg(7,source,0x70)
+ savereg(8,target,0x80)
+ loadreg(12,source,0xc0)
+ savereg(5,target,0x50)
+ loadreg(9,source,0x90)
+ savereg(10,target,0xa0)
+ loadreg(14,source,0xe0)
+ savereg(7,target,0x70)
+ loadreg(11,source,0xb0)
+ savereg(12,target,0xc0)
+ loadreg(13,source,0xd0)
+ savereg(9,target,0x90)
+ savereg(14,target,0xe0)
+ savereg(11,target,0xb0)
+ savereg(13,target,0xd0)
+
+ : /* outputs */
+ "=m" (*target),
+ [index] "=&b" (index)
+ : /* Inputs */
+ [source] "b" (source), /* inputs */
+ [target] "b" (target) /* inputs */
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14"
+ );
+ return 0 ;
+ }
+
+static inline int torus_frame_payload_memcpy_try2(
+ torus_frame_payload * target,
+ torus_frame_payload * source
+ )
+ {
+ unsigned int index ;
+
+ TRACEN(k_t_detail, "torus_payload_memcpy target=%p source=%p",target,source) ;
+ asm (
+ loadreg(0,source,0x00)
+ loadreg(1,source,0x10)
+ loadreg(2,source,0x20)
+ loadreg(4,source,0x40)
+ savereg(0,target,0x00)
+ loadreg(6,source,0x60)
+ savereg(2,target,0x20)
+ loadreg(3,source,0x30)
+ savereg(4,target,0x40)
+ loadreg(8,source,0x80)
+ savereg(1,target,0x10)
+ loadreg(5,source,0x50)
+ savereg(6,target,0x60)
+ loadreg(10,source,0xa0)
+ savereg(3,target,0x30)
+ loadreg(7,source,0x70)
+ savereg(8,target,0x80)
+ loadreg(12,source,0xc0)
+ savereg(5,target,0x50)
+ loadreg(9,source,0x90)
+ savereg(10,target,0xa0)
+ loadreg(14,source,0xe0)
+ savereg(7,target,0x70)
+ loadreg(11,source,0xb0)
+ savereg(12,target,0xc0)
+ loadreg(13,source,0xd0)
+ savereg(9,target,0x90)
+ savereg(14,target,0xe0)
+ savereg(11,target,0xb0)
+ savereg(13,target,0xd0)
+
+ : /* outputs */
+ "=m" (*target),
+ [index] "=&b" (index)
+ : /* Inputs */
+ [source] "b" (source), /* inputs */
+ [target] "b" (target) /* inputs */
+ : "fr0", "fr1", "fr2", /* Clobbers */
+ "fr3", "fr4", "fr5",
+ "fr6", "fr7", "fr8",
+ "fr9", "fr10", "fr11",
+ "fr12","fr13", "fr14"
+ );
+ return 0 ;
+ }
+#endif
diff --git a/drivers/net/bgp_torus/bgp_fpu_memcpy.c b/drivers/net/bgp_torus/bgp_fpu_memcpy.c
new file mode 100644
index 00000000000000..8b60a213e7c5cf
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_fpu_memcpy.c
@@ -0,0 +1,825 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ *
+ * Description: Blue Gene/P low-level driver for copy_tofrom_user thorough the
+ * parallel floating point unit
+ *
+ *
+ *
+ ********************************************************************/
+#define REQUIRES_DUMPMEM
+
+#include <linux/version.h>
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/highmem.h>
+#include <linux/mman.h>
+#include <linux/syscalls.h>
+#include <linux/pagemap.h>
+
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/time.h>
+#include <asm/bitops.h>
+#include <asm/time.h>
+
+#include "../bgp_network/bgp_net_traceflags.h"
+#include <common/bgp_bitnumbers.h>
+#include "bgp_bic_diagnosis.h"
+#include "../bgp_network/bgdiagnose.h"
+#include "../bgp_network/450_tlb.h"
+/* Can drop bits out of COMPILED_TRACEMASK if we want to selectively compile out trace */
+#define COMPILED_TRACEMASK (0xffffffff)
+/* #define COMPILED_TRACEMASK (k_t_error) */
+
+#include <linux/KernelFxLog.h>
+
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE)
+extern int bgp_dma_tcp_tracemask ;
+#define TRACEN(i,x...) KernelFxLog(bgp_dma_tcp_tracemask & (COMPILED_TRACEMASK & (i)),x)
+#else
+#define TRACEN(i,x...)
+#endif
+
+#include "bgp_memcpy.h"
+
+struct ctl_table bgp_memcpy_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "use_dma",
+ .data = &bgp_memcpy_control.use_dma,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "verify_fpu",
+ .data = &bgp_memcpy_control.verify_fpu,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "verify_dma",
+ .data = &bgp_memcpy_control.verify_dma,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "use_fpu",
+ .data = &bgp_memcpy_control.use_fpu,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "dma_threshold",
+ .data = &bgp_memcpy_control.dma_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "fpu_threshold",
+ .data = &bgp_memcpy_control.fpu_threshold,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "faults_until_disable",
+ .data = &bgp_memcpy_control.faults_until_disable,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "cycles_per_packet",
+ .data = &bgp_memcpy_control.cycles_per_packet,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "rate_observe_report_count",
+ .data = &bgp_memcpy_control.rate_observe_report_count,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "handle_pagecrossing",
+ .data = &bgp_memcpy_control.handle_pagecrossing,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "fpu_handle_pagecrossing_read",
+ .data = &bgp_memcpy_control.fpu_handle_pagecrossing_read,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "fpu_handle_pagecrossing_write",
+ .data = &bgp_memcpy_control.fpu_handle_pagecrossing_write,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "mask",
+ .data = &bgp_memcpy_control.mask,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "assist_active",
+ .data = &bgp_memcpy_control.assist_active,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "statistics",
+ .data = &bgp_dma_memcpy_statistics,
+ .maxlen = k_copy_statistics*sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ } ,
+ { 0 },
+} ;
+
+static struct ctl_path memcpy_ctl_path[] = {
+ { .procname = "bgp", .ctl_name = 0, },
+ { .procname = "copy", .ctl_name = 0, },
+ { },
+};
+bgp_memcpy_control_t bgp_memcpy_control =
+ {
+ .use_dma = 0 ,
+ .use_fpu = 1 ,
+ .dma_threshold = 10000 ,
+ .fpu_threshold = 512 ,
+ .verify_dma = 0 ,
+ .verify_fpu = 0 ,
+ .cycles_per_packet = 20 ,
+ .rate_observe_report_count = 0xffffffff ,
+ .faults_until_disable = 1 ,
+ .handle_pagecrossing = 1 ,
+ .fpu_handle_pagecrossing_read = 0 ,
+ .fpu_handle_pagecrossing_write = 0 ,
+ .mask = 0 ,
+ .assist_active = 0
+ };
+
+unsigned int bgp_dma_memcpy_statistics[k_copy_statistics] ;
+
+
+static void cause_fallback(void)
+{
+ TRACEN(k_t_request,"Turning off DH memcpy") ;
+ bgp_memcpy_control.use_fpu = 0 ;
+ dma_memcpy_statistic(k_copy_cause_fallback) ;
+}
+enum {
+ k_page_shift = PAGE_SHIFT ,
+ k_page_size = 1 << k_page_shift ,
+ k_page_offset_mask = k_page_size-1 ,
+ k_fpu_alignment = 16 ,
+ k_fpu_align_mask = k_fpu_alignment - 1
+};
+
+enum {
+ k_diag_not_mapped=0
+/* k_diagnose=1 */
+};
+
+enum {
+ k_exploit_doublehummer = 1,
+ k_verify_doublehummer = 1,
+ k_fixup_faulty_memcpy=1,
+ k_premark=0 ,
+ k_map_write_check=0 ,
+ k_map_read_check=0 ,
+ k_disable_after_too_many_faults=1 ,
+ k_inhibit_crosspage_write = 1 , // Set this if you want to not handle writes which cross a user-space page boundary
+ k_inhibit_crosspage_read = 1 // Set this if you want to not handle reads which cross a user-space page boundary
+};
+static void report_faulty_memcpy(void * dest, const void * src, unsigned long size)
+{
+ unsigned int * di = (unsigned int *) dest ;
+ const unsigned int * si = (const unsigned int *) src ;
+ unsigned char * dc = (unsigned char *) (dest) ;
+ const unsigned char * sc = (const unsigned char *) (src) ;
+ unsigned int x ;
+ unsigned int faultwordcount = 0 ;
+ if( k_disable_after_too_many_faults)
+ {
+ int faults_to_go=bgp_memcpy_control.faults_until_disable-1 ;
+ if( faults_to_go <= 0 )
+ {
+ cause_fallback() ;
+ }
+ else
+ {
+ bgp_memcpy_control.faults_until_disable=faults_to_go ;
+ }
+ }
+ dma_memcpy_statistic(k_copy_verify_miscompares) ;
+ TRACEN(k_t_error,"dest=%p src=%p size=0x%08lx",dest,src,size) ;
+ for(x=0;x<size/sizeof(unsigned int);x+=1)
+ {
+ if( di[x] != si[x] )
+ {
+ TRACEN(k_t_error,"(E) x=0x%08x di+x=%p si+x=%p di[x]=0x%08x si[x]=0x%08x",
+ x,di+x,si+x,di[x],si[x]) ;
+ if( k_fixup_faulty_memcpy) di[x]=si[x] ;
+ faultwordcount += 1 ;
+ }
+ }
+ if( dc[size-3] != sc[size-3])
+ {
+ TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+ size-3,dc+size-3,sc+size-3,dc[size-3],sc[size-3]) ;
+ if( k_fixup_faulty_memcpy) dc[size-3]=sc[size-3] ;
+ }
+ if( dc[size-2] != sc[size-2])
+ {
+ TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+ size-2,dc+size-2,sc+size-2,dc[size-2],sc[size-2]) ;
+ if( k_fixup_faulty_memcpy) dc[size-2]=sc[size-2] ;
+ }
+ if( dc[size-1] != sc[size-1])
+ {
+ TRACEN(k_t_error,"(E) x=0x%08lx dc+x=%p sc+x=%p dc[x]=0x%02x sc[x]=0x%02x",
+ size-1,dc+size-1,sc+size-1,dc[size-1],sc[size-1]) ;
+ if( k_fixup_faulty_memcpy) dc[size-1]=sc[size-1] ;
+ }
+ TRACEN(k_t_error,"%d/%ld words incorrectly copied",faultwordcount,size/sizeof(unsigned int)) ;
+
+}
+/* Check that a 'memcpy' was accurately done ... */
+static void verify_memcpy(void * dest, const void * src, unsigned long size)
+{
+ unsigned int * di = (unsigned int *) dest ;
+ const unsigned int * si = (const unsigned int *) src ;
+ unsigned char * dc = (unsigned char *) (dest) ;
+ const unsigned char * sc = (const unsigned char *) (src) ;
+ unsigned int q = di[0] ^ si[0] ;
+ unsigned int x ;
+ dma_memcpy_statistic(k_copy_verify_attempts) ;
+ TRACEN(k_t_fpucopy,"dest=%p src=%p size=0x%08lx di[0]=0x%08x si[0]=0x%08x",dest,src,size,di[0],si[0]) ;
+ for(x=1;x<size/sizeof(unsigned int);x+=1)
+ {
+ q |= di[x] ^ si[x] ;
+ }
+ q |= (dc[size-3] ^ sc[size-3]) |(dc[size-2] ^ sc[size-2]) |(dc[size-1] ^ sc[size-1]) ;
+ if(q) report_faulty_memcpy(dest,src,size) ;
+}
+
+typedef struct { unsigned char c[128] ; } miniblock ;
+
+#define nl "\n"
+/* Returns 0 for a good copy, 1 if an exception (unmapped storage) occurred */
+static int doublehummer_copy_unroll(void *to, const void *from, int count)
+{
+ int x1=0x10 ;
+ int x2=0x20 ;
+ int x3=0x30 ;
+ int x4=0x40 ;
+ int x5=0x50 ;
+ int x6=0x60 ;
+ int x7=0x70 ;
+ int x8=0x80 ;
+ int xa=0xa0 ;
+ int xc=0xc0 ;
+ int xe=0xe0 ;
+ int rc ;
+ asm volatile (
+ "mtctr %[count]" nl
+ "100: lfpdx 0,0,%[src]" nl
+ "101: lfpdx 2,%[index2],%[src]" nl
+ "102: lfpdx 4,%[index4],%[src]" nl
+ "103: lfpdx 6,%[index6],%[src]" nl
+ "104: lfpdx 1,%[index1],%[src]" nl
+ "105: lfpdx 3,%[index3],%[src]" nl
+ "106: lfpdx 5,%[index5],%[src]" nl
+ "107: lfpdx 7,%[index7],%[src]" nl
+ "108: stfpdx 0,0 ,%[dst]" nl
+ "109: lfpdx 0,%[index8],%[src]" nl
+ "110: stfpdx 2,%[index2],%[dst]" nl
+ "111: lfpdx 2,%[indexa],%[src]" nl
+ "112: stfpdx 4,%[index4],%[dst]" nl
+ "113: lfpdx 4,%[indexc],%[src]" nl
+ "114: stfpdx 6,%[index6],%[dst]" nl
+ "115: lfpdx 6,%[indexe],%[src]" nl
+ "bdz 1f" nl
+
+ "0:" nl
+ "addi %[src],%[src],128" nl
+
+ "116: stfpdx 1,%[index1],%[dst]" nl
+ "117: lfpdx 1,%[index1],%[src]" nl
+ "118: stfpdx 0,%[index8],%[dst]" nl
+ "119: lfpdx 0,%[index8],%[src]" nl
+
+ "120: stfpdx 3,%[index3],%[dst]" nl
+ "121: lfpdx 3,%[index3],%[src]" nl
+ "122: stfpdx 2,%[indexa],%[dst]" nl
+ "123: lfpdx 2,%[indexa],%[src]" nl
+
+ "124: stfpdx 5,%[index5],%[dst]" nl
+ "125: lfpdx 5,%[index5],%[src]" nl
+ "126: stfpdx 4,%[indexc],%[dst]" nl
+ "127: lfpdx 4,%[indexc],%[src]" nl
+
+ "128: stfpdx 7,%[index7],%[dst]" nl
+ "129: lfpdx 7,%[index7],%[src]" nl
+ "130: stfpdx 6,%[indexe],%[dst]" nl
+ "addi %[dst],%[dst],128" nl
+ "131: lfpdx 6,%[indexe],%[src]" nl
+
+ "bdnz 0b" nl
+
+
+ "1:" nl
+ "addi %[src],%[src],128" nl
+
+ "132: stfpdx 1,%[index1],%[dst]" nl
+ "133: lfpdx 1,%[index1],%[src]" nl
+ "134: stfpdx 0,%[index8],%[dst]" nl
+
+ "135: stfpdx 3,%[index3],%[dst]" nl
+ "136: lfpdx 3,%[index3],%[src]" nl
+ "137: stfpdx 2,%[indexa],%[dst]" nl
+
+ "138: stfpdx 5,%[index5],%[dst]" nl
+ "139: lfpdx 5,%[index5],%[src]" nl
+ "140: stfpdx 4,%[indexc],%[dst]" nl
+
+ "141: stfpdx 7,%[index7],%[dst]" nl
+ "142: lfpdx 7,%[index7],%[src]" nl
+ "143: stfpdx 6,%[indexe],%[dst]" nl
+
+ "addi %[dst],%[dst],128" nl
+ "144: stfpdx 1,%[index1],%[dst]" nl
+ "145: stfpdx 3,%[index3],%[dst]" nl
+ "146: stfpdx 5,%[index5],%[dst]" nl
+ "147: stfpdx 7,%[index7],%[dst]" nl
+/* Following section needed to handle exceptions (user code passing addresses which SEGV) */
+ "li %[rc],0" nl
+ "b 3f" nl
+
+ "2:" nl
+ "li %[rc],1" nl
+ "3:" nl
+ ".section __ex_table,\"a\"" nl
+
+ ".align 2" nl
+ ".long 100b,2b" nl
+ ".long 101b,2b" nl
+ ".long 102b,2b" nl
+ ".long 103b,2b" nl
+ ".long 104b,2b" nl
+ ".long 105b,2b" nl
+ ".long 106b,2b" nl
+ ".long 107b,2b" nl
+ ".long 108b,2b" nl
+ ".long 109b,2b" nl
+ ".long 110b,2b" nl
+ ".long 111b,2b" nl
+ ".long 112b,2b" nl
+ ".long 113b,2b" nl
+ ".long 114b,2b" nl
+ ".long 115b,2b" nl
+ ".long 116b,2b" nl
+ ".long 117b,2b" nl
+ ".long 118b,2b" nl
+ ".long 119b,2b" nl
+ ".long 120b,2b" nl
+ ".long 121b,2b" nl
+ ".long 122b,2b" nl
+ ".long 123b,2b" nl
+ ".long 124b,2b" nl
+ ".long 125b,2b" nl
+ ".long 126b,2b" nl
+ ".long 127b,2b" nl
+ ".long 128b,2b" nl
+ ".long 129b,2b" nl
+ ".long 130b,2b" nl
+ ".long 131b,2b" nl
+ ".long 132b,2b" nl
+ ".long 133b,2b" nl
+ ".long 134b,2b" nl
+ ".long 135b,2b" nl
+ ".long 136b,2b" nl
+ ".long 137b,2b" nl
+ ".long 138b,2b" nl
+ ".long 139b,2b" nl
+ ".long 140b,2b" nl
+ ".long 141b,2b" nl
+ ".long 142b,2b" nl
+ ".long 143b,2b" nl
+ ".long 144b,2b" nl
+ ".long 145b,2b" nl
+ ".long 146b,2b" nl
+ ".long 147b,2b" nl
+ ".text" nl
+
+ : /* Outputs */
+ [rc] "=b" (rc)
+ : /* Inputs */
+ [dst] "b" (to),
+ [src] "b" (from),
+ [count] "r" (count),
+ [index1] "b" (x1),
+ [index2] "b" (x2),
+ [index3] "b" (x3),
+ [index4] "b" (x4),
+ [index5] "b" (x5),
+ [index6] "b" (x6),
+ [index7] "b" (x7),
+ [index8] "b" (x8),
+ [indexa] "b" (xa),
+ [indexc] "b" (xc),
+ [indexe] "b" (xe)
+ : /* Clobbers */
+ "memory",
+ "fr0","fr1","fr2","fr3",
+ "fr4","fr5","fr6","fr7"
+ ) ;
+
+ return rc ;
+}
+static void doublehummer_store_quads(void *dest, int count, const double *v0, const double *v1)
+{
+ asm volatile (
+ "mtctr %[count]" nl
+ "lfdx 0,0,%[v0]" nl
+ "lfsdx 0,0,%[v1]" nl
+ "0: stfpdx 0,0,%[dest]" nl
+ "addi %[dest],%[dest],16" nl
+ "bdnz 0b" nl
+ : /* Outputs */
+ : /* Inputs */
+ [dest] "b" (dest),
+ [v0] "b" (v0),
+ [v1] "b" (v1),
+ [count] "r" (count)
+ : /* Clobbers */
+ "memory",
+ "fr0"
+ ) ;
+
+}
+
+/* Try a 'doublehummer' memcpy, return 0 if we could and 1 if we couldn't */
+static int doublehummer_memcpy(void * dest, const void * src, unsigned long size)
+{
+ if( k_exploit_doublehummer)
+ {
+ unsigned int di = (unsigned int) dest ;
+ unsigned int si = (unsigned int) src ;
+ unsigned int mutual_alignment = (di - si) & k_fpu_align_mask ;
+ unsigned int source_alignment = si & k_fpu_align_mask ;
+ unsigned int precopy_size = source_alignment ? (k_fpu_alignment - source_alignment) : 0 ;
+ unsigned int miniblock_di = di + precopy_size ;
+ unsigned int miniblock_si =si + precopy_size ;
+ unsigned int miniblock_size = size - precopy_size ;
+ unsigned int miniblock_count=miniblock_size/sizeof(miniblock) ;
+ unsigned int size_floor=miniblock_count*sizeof(miniblock) ;
+ unsigned int size_tail = size - size_floor - precopy_size ;
+ unsigned long flags ;
+ int rc ;
+ if( mutual_alignment )
+ {
+ dma_memcpy_statistic(k_copy_unaligned_rejects) ;
+ return 1 ; // Alignment between source and destination not good enough
+ }
+ /* The source and dest are mutually aligned. Do we need a 1-15 byte pre-copy to get to quad alignment ? */
+ if( precopy_size )
+ {
+ rc = __real__copy_tofrom_user(dest, src, precopy_size) ;
+ if(rc)
+ {
+ dma_memcpy_statistic(k_precopy_segv_trap) ;
+ return 1 ;
+ }
+/* memcpy(dest,src,precopy_size) ; */
+ }
+ enable_kernel_fp() ;
+
+/* The copy should work with interrupts enabled, but whenever I tried it there were occasional errors in copying. */
+/* TODO: Diagnose why, fix, and run the copy without disabling. Same for the 'page copy' and 'page clear later */
+ local_irq_save(flags) ;
+ rc = doublehummer_copy_unroll((void *)miniblock_di,(void *)miniblock_si,miniblock_count-1) ;
+ local_irq_restore(flags) ;
+ if( rc )
+ {
+ dma_memcpy_statistic(k_copy_segv_trap) ;
+ return 1 ;
+ }
+
+ if( size_tail )
+ {
+ /* TODO: Fix up what happens if this causes a 'segv' */
+ rc = __real__copy_tofrom_user((void *)(miniblock_di+size_floor), (void *)(miniblock_si+size_floor), size_tail) ;
+ if(rc)
+ {
+ dma_memcpy_statistic(k_postcopy_segv_trap) ;
+ return 1 ;
+ }
+/* memcpy((void *)(miniblock_di+size_floor),(void *)(miniblock_si+size_floor),size_tail) ; */
+ }
+ if( k_verify_doublehummer && bgp_memcpy_control.verify_fpu)
+ {
+ verify_memcpy(dest,src,size) ;
+ }
+ return 0 ;
+ }
+ else
+ {
+ return 1 ;
+ }
+}
+
+static unsigned int operate_vcopy(unsigned long address, void * partner_vaddr, unsigned long size)
+{
+ TRACEN(k_t_detail,"address=0x%08lx partner_vaddr=%p size=0x%08lx",address,partner_vaddr,size) ;
+ return doublehummer_memcpy(partner_vaddr,(const void *)address,size) ;
+}
+
+
+static int all_pages_mapped_read(unsigned long address, unsigned long size)
+{
+ unsigned int start_page=(address >> k_page_shift) ;
+ unsigned int end_page=((address+size) >> k_page_shift) ;
+ unsigned int page_count = end_page-start_page+1 ;
+ unsigned int x ;
+ if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK
+ if( k_inhibit_crosspage_read && page_count > 1 && 0 == bgp_memcpy_control.fpu_handle_pagecrossing_read)
+ {
+ /* TODO: Should be able to handle page-crossings, but have seen kernel traps related to this */
+ dma_memcpy_statistic(k_copy_crosspage_limitation_rejects) ;
+ return 1 ;
+ }
+ /* Defend against the possibility that the user application has posted an unmapped address */
+ for(x=0;x<page_count;x+=1)
+ {
+ int pageInt ;
+ int __user * pageIntP = (int __user *) ((start_page+x) << k_page_shift) ;
+ if( get_user(pageInt,pageIntP) )
+ {
+ TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+ if( k_diag_not_mapped)
+ {
+ tlb_t t ;
+ unsigned int r=v_to_r_maybe((void *)address, &t) ;
+ TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+ TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+ diagnose_tlb(&t) ;
+ }
+
+ return 1;
+ }
+
+ }
+ return 0 ;
+}
+static int all_pages_mapped_write(unsigned long address, unsigned long size)
+{
+ unsigned int start_page=(address >> k_page_shift) ;
+ unsigned int end_page=((address+size) >> k_page_shift) ;
+ unsigned int page_count = end_page-start_page+1 ;
+ unsigned int x ;
+/* int pageInt ; */
+ char __user * pageCharP = (char __user *) address ;
+ if( is_kernel_addr(address)) return 0 ; // If we have a 'kernel address', assume it's OK
+ if( k_inhibit_crosspage_write && page_count > 1 && 0 == bgp_memcpy_control.fpu_handle_pagecrossing_write )
+ {
+ /* TODO: Should be able to handle page-crossings, but have seen kernel traps related to this */
+ dma_memcpy_statistic(k_copy_crosspage_limitation_rejects) ;
+ return 1 ;
+ }
+ if(put_user(0,pageCharP))
+ {
+ TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+ if( k_diag_not_mapped)
+ {
+ tlb_t t ;
+ unsigned int r=v_to_r_maybe((void *)address, &t) ;
+ TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+ TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+ diagnose_tlb(&t) ;
+ }
+
+ return 1;
+ }
+ /* Defend against the possibility that the user application has posted an unmapped address */
+ for(x=1;x<page_count;x+=1)
+ {
+/* int pageInt ; */
+ char __user * pageCharP = (char __user *) ((start_page+x) << k_page_shift) ;
+/* put_user(current_injection_used, report) ; */
+ if( put_user(0,pageCharP) )
+ {
+ TRACEN(k_t_general,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+ if( k_diag_not_mapped)
+ {
+ tlb_t t ;
+ unsigned int r=v_to_r_maybe((void *)address, &t) ;
+ TRACEN(k_t_request,"Unmapped : 0x%08x start_page=0x%08x page_count=0x%08x",((start_page+x) << k_page_shift),start_page,page_count) ;
+ TRACEN(k_t_request,"address=0x%08lx r=0x%08x",address,r) ;
+ diagnose_tlb(&t) ;
+ }
+
+ return 1;
+ }
+
+ }
+ return 0 ;
+}
+
+static int instrument_copy_user_address_by_touch(unsigned long address, unsigned long size,void * partner_vaddr)
+{
+
+ if( k_map_read_check && all_pages_mapped_read(address,size))
+ {
+ dma_memcpy_statistic(k_copy_source_rejects) ;
+ return 1 ;
+ }
+ if( k_map_write_check && all_pages_mapped_write((unsigned int) partner_vaddr,size))
+ {
+ dma_memcpy_statistic(k_copy_target_rejects) ;
+ return 1 ;
+ }
+
+ /* Looks like we can run the transfer with the FPU */
+ return operate_vcopy(address,partner_vaddr,size) ;
+
+}
+
+static int instrument_copy_tofrom_user(unsigned long to, unsigned long from, unsigned long size)
+{
+
+ int rc=1 ;
+ TRACEN(k_t_fpucopy,"(>)") ;
+ /* TODO: Check by touching and poking that all pages in 'to' and 'from' are appropriately mapped, before going into the hummer loop */
+ rc= instrument_copy_user_address_by_touch(from,size,(void *)to) ;
+ TRACEN(k_t_fpucopy,"(<) rc=%d",rc) ;
+ return rc ;
+}
+
+enum {
+ k_enable_dma_memcpy = 1 // TODO: Get DMA memcopy working, and enable it here
+};
+/* Returns 1 if we could DMA-copy things, 0 if we couldn't */
+extern unsigned long bgp_fpu_instrument_copy_tofrom_user(void *to,
+ const void __user *from, unsigned long size)
+{
+ if( k_premark && bgp_memcpy_control.verify_dma) memset(to,0x11,size) ; // Mark the memory so we know if we write it
+#if defined(CONFIG_BLUEGENE_DMA_MEMCPY)
+ if( k_enable_dma_memcpy && bgp_memcpy_control.use_dma)
+ {
+ if( bgp_memcpy_control.mask)
+ {
+ unsigned long flags ;
+ unsigned long rc ;
+ local_irq_save(flags) ;
+ rc = bgp_dma_instrument_copy_tofrom_user(to, from, size) ;
+ local_irq_restore(flags) ;
+ return rc ;
+ }
+ else
+ {
+ return bgp_dma_instrument_copy_tofrom_user(to, from, size) ;
+ }
+ }
+ else
+#endif
+ {
+ dma_memcpy_statistic(k_copy_tofrom_user_calls) ;
+ if( size > 0 && bgp_memcpy_control.use_fpu && size >= bgp_memcpy_control.fpu_threshold )
+ {
+ {
+ TRACEN(k_t_fpucopy,"to=%p from=%p size=0x%08lx",to,from,size) ;
+ {
+ unsigned long rc= instrument_copy_tofrom_user((unsigned long)to,(unsigned long)from,size) ;
+ dma_memcpy_statistic((0==rc) ? k_copy_accelerate_successes : k_copy_accelerate_rejects) ;
+
+ return rc ;
+ }
+
+ }
+ }
+ dma_memcpy_statistic(k_copy_size_rejects) ;
+ return 1 ; // Not copied, size under threshold
+ }
+}
+
+#if defined(CONFIG_WRAP_COPY_TOFROM_USER)
+void copy_page(void *to, void *from)
+{
+ TRACEN(k_t_fpucopy,"to=%p from=%p",to,from) ;
+ if(bgp_memcpy_control.assist_active )
+ {
+ unsigned long flags ;
+ unsigned int miniblock_count = k_page_size / sizeof(miniblock) ;
+ enable_kernel_fp() ;
+
+ local_irq_save(flags) ;
+ doublehummer_copy_unroll((void *)to,(void *)from,miniblock_count-1) ;
+ local_irq_restore(flags) ;
+ }
+ else
+ {
+ memcpy(to,from,k_page_size) ;
+ }
+
+}
+
+static const double v=0.0 ;
+void clear_pages(void *p, int order)
+{
+ TRACEN(k_t_fpucopy,"p=%p order=%d",p,order) ;
+ if(bgp_memcpy_control.assist_active )
+ {
+ unsigned int quadcount=(k_page_size/16) << order ;
+ unsigned long flags ;
+ enable_kernel_fp() ;
+/* double v=0.0 ; */
+ local_irq_save(flags) ;
+ doublehummer_store_quads(p,quadcount,&v,&v) ;
+ local_irq_restore(flags) ;
+
+
+ }
+ else
+ {
+ memset(p,0,k_page_size << order) ;
+ }
+
+
+}
+#endif
+
+void __init
+bgp_fpu_register_memcpy_sysctl(void)
+{
+ register_sysctl_paths(memcpy_ctl_path,bgp_memcpy_table) ;
+ TRACEN(k_t_init, "memcpy sysctl registered") ;
+
+}
diff --git a/drivers/net/bgp_torus/bgp_memcpy.h b/drivers/net/bgp_torus/bgp_memcpy.h
new file mode 100644
index 00000000000000..b4aa80f32a30de
--- /dev/null
+++ b/drivers/net/bgp_torus/bgp_memcpy.h
@@ -0,0 +1,204 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Author: Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: Blue Gene low-level driver copy_tofrom_user using
+ * BlueGene-specific hardware
+ *
+ *
+ ********************************************************************/
+#ifndef __BGP_MEMCPY_H__
+#define __BGP_MEMCPY_H__
+
+
+typedef struct
+{
+ int use_dma ;
+ int use_fpu ;
+ int dma_threshold ; /* Use the BGP DMA unit for copy_tofrom_user this size or larger */
+ int fpu_threshold ; /* Use the BGP FPU for copy_tofrom_user this size or larger */
+ int verify_dma ; /* Whether to verify the copy (for diagnostics) */
+ int verify_fpu ; /* Whether to verify the copy (for diagnostics) */
+ int cycles_per_packet ; /* Estimate of number of cycles per packet, for local spin before looking at counters */
+ int faults_until_disable ; /* Number of faults until we disable acceleration */
+ int rate_observe_report_count ; /* Number of times out of 256 that the rate gets displayed */
+ int handle_pagecrossing ; /* Whether the DMA version should attempt to handle page-boundary-crossings */
+ int fpu_handle_pagecrossing_read ; /* Whether the FPU version should attempt to handle page-boundary-crossings on reads */
+ int fpu_handle_pagecrossing_write ; /* Whether the FPU version should attempt to handle page-boundary-crossings on writes */
+ int mask ; /* Whether to mask interrupts */
+ int assist_active ; /* Whether to assist copypage and clearpages */
+ /* int trace_count ; */ /* Number of trace records to cut before stopping */
+} bgp_memcpy_control_t ;
+
+extern bgp_memcpy_control_t bgp_memcpy_control ;
+
+enum {
+ k_copy_tofrom_user_calls ,
+ k_copy_cause_fallback ,
+ k_copy_accelerate_successes ,
+ k_copy_accelerate_rejects ,
+
+ k_copy_size_rejects ,
+ k_copy_spanpage_rejects ,
+ k_copy_crosspage_limitation_rejects ,
+ k_copy_inconsistent_tlb_1_rejects ,
+
+ k_copy_inconsistent_tlb_2_rejects ,
+ k_copy_no_counter_rejects ,
+ k_copy_source_tlb_rejects ,
+ k_copy_target_tlb_rejects ,
+
+ k_copy_source_rejects ,
+ k_copy_target_rejects ,
+ k_copy_unaligned_rejects ,
+ k_copy_verify_attempts ,
+
+ k_copy_verify_miscompares ,
+ k_copy_tlb_touches ,
+ k_copy_await_idle_zero ,
+ k_copy_await_idle_low ,
+
+ k_copy_await_idle_high ,
+ k_copy_inconsistent_tlb_1_info ,
+ k_copy_inconsistent_tlb_2_info ,
+ k_copy_segv_trap ,
+
+ k_precopy_segv_trap ,
+ k_postcopy_segv_trap ,
+
+ k_copy_statistics
+};
+
+/* The underlying assembler copy function, returns 0 iff it copies all the data */
+extern unsigned long __real__copy_tofrom_user(void *to,
+ const void __user *from, unsigned long size) ;
+
+extern unsigned int bgp_dma_memcpy_statistics[k_copy_statistics] ;
+static inline void dma_memcpy_statistic(unsigned int X)
+{
+ bgp_dma_memcpy_statistics[X] += 1 ;
+}
+
+extern unsigned long bgp_dma_instrument_copy_tofrom_user(void *to,
+ const void *from, unsigned long size) ;
+extern unsigned long bgp_fpu_instrument_copy_tofrom_user(void *to,
+ const void *from, unsigned long size) ;
+
+enum
+{
+ k_diagnose = 1
+};
+/* Items to record about a copy op, for diagnosing faults */
+typedef struct
+{
+ const void * vaddr ;
+ unsigned int tlb_v ;
+ unsigned int pageid ;
+ unsigned int xlat ;
+ unsigned int attrib ;
+} tlb_t ;
+
+typedef struct
+{
+ void * to_vaddr ;
+ const void * from_vaddr ;
+ unsigned int size ;
+ tlb_t a_tlb ;
+ tlb_t b_tlb ;
+ unsigned int a_raddress ;
+ unsigned int b_raddress ;
+ unsigned int from_check_pre ;
+ unsigned int to_check_pre ;
+ unsigned int from_check_post ;
+ unsigned int to_check_post ;
+ unsigned int frag_index ;
+} copy_op_t ;
+
+static void diagnose_tlb(tlb_t *t)
+{
+ unsigned int t0=t->pageid ;
+ unsigned int t1=t->xlat ;
+ unsigned int t2=t->attrib ;
+ TRACEN(k_t_request,"vaddr=%p tlb_v=0x%08x %08x-%08x-%08x ts=%d tid=0x%02x epn=0x%08x rpn=0x%01x-%08x size=%s WIMG=%d%d%d%d U=%d%d%d%d V=%d uxwr=%d sxwr=%d",
+ t->vaddr,t->tlb_v,t0,t1,t2,
+ (t0 & TLB0_TS) ? 1 : 0,
+ (t2 >> 22) & 0xff ,
+ TLB0_EPN_1K(t0),
+ TLB1_ERPN(t1),TLB1_RPN_1K(t1),
+ TLB_SIZES[(t0 & 0xF0) >> 4],
+ (t2 & TLB2_W) ? 1 : 0,
+ (t2 & TLB2_I) ? 1 : 0,
+ (t2 & TLB2_M) ? 1 : 0,
+ (t2 & TLB2_G) ? 1 : 0,
+ (t2 & TLB2_U0) ? 1 : 0,
+ (t2 & TLB2_U1) ? 1 : 0,
+ (t2 & TLB2_U2) ? 1 : 0,
+ (t2 & TLB2_U3) ? 1 : 0,
+ (t0 & TLB0_V) ? 1 : 0,
+ (t2 >> 3) & 7,
+ t2 & 7
+ ) ;
+}
+static void diagnose_faulty_copy(copy_op_t *c) __attribute__((unused)) ;
+static void diagnose_faulty_copy(copy_op_t *c)
+{
+ TRACEN(k_t_request,"from_vaddr=%p to_vaddr=%p size=0x%08x a_raddress=0x%08x b_raddress=0x%08x from_check_pre=0x%08x to_check_pre=0x%08x from_check_post=0x%08x to_check_post=0x%08x frag_index=%d",
+ c->from_vaddr,c->to_vaddr,c->size,c->a_raddress,c->b_raddress,c->from_check_pre,c->from_check_post,c->to_check_pre,c->to_check_post,c->frag_index) ;
+ diagnose_tlb(&c->a_tlb) ;
+ diagnose_tlb(&c->b_tlb) ;
+}
+
+/* Find the real store address for a virtual address, by looking at the TLB and causing a TLB miss if needed */
+static unsigned int v_to_r_maybe(const void * vaddr,tlb_t *t)
+{
+ unsigned int vaddr_int=(unsigned int)vaddr ;
+ int tlbx=search_tlb_v(vaddr_int) ;
+ int pageid=get_tlb_pageid(tlbx) ;
+ int xlat=get_tlb_xlat(tlbx) ;
+ int attrib=get_tlb_attrib(tlbx) ;
+ int tlbx1=search_tlb_v((unsigned int)vaddr) ;
+ if( k_diagnose)
+ {
+ t->vaddr = vaddr ;
+ t->tlb_v = tlbx1 ;
+ t->pageid = pageid ;
+ t->xlat = xlat ;
+ t->attrib = attrib ;
+ }
+ if( (tlbx == tlbx1) /* Translation didn't change under me due to e.g. interrupt */
+ && ((pageid & TLB0_V) != 0) /* TLB is valid */
+ && ((tlbx & 0x20000000) != 0) /* search_tlb_v sets this bit if it found a translation */
+ )
+ {
+ unsigned int epn = TLB0_EPN_1K(pageid) ; // virtual page for the TLB
+ unsigned int rpn = TLB1_RPN_1K(xlat) ; // real page for the TLB
+ unsigned int result = (vaddr_int-epn) + rpn ;
+ TRACEN(k_t_dmacopy,"vaddr=%p tlbx=0x%08x pageid=0x%08x xlat=0x%08x attrib=0x%08x epn=0x%08x rpn=0x%08x result=0x%08x",
+ vaddr,tlbx,pageid,xlat,attrib,epn,rpn,result) ;
+ return result ;
+
+ }
+ else
+ {
+ TRACEN(k_t_dmacopy,"vaddr=%p tlbx=0x%08x pageid=0x%08x tlbx1=0x%08x unmapped",
+ vaddr,tlbx,pageid,tlbx1) ;
+ return (unsigned int) -1 ; // Not mapped
+ }
+}
+
+#endif
diff --git a/drivers/net/bgp_torus/bgtor.h b/drivers/net/bgp_torus/bgtor.h
new file mode 100644
index 00000000000000..49bceff315efda
--- /dev/null
+++ b/drivers/net/bgp_torus/bgtor.h
@@ -0,0 +1,310 @@
+/*********************************************************************
+ *
+ * Description: Torus definitions
+ *
+ * Copyright (c) 2007, 2008 International Business Machines
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ ********************************************************************/
+#ifndef __DRIVERS__BLUEGENE__TORUS_H__
+#define __DRIVERS__BLUEGENE__TORUS_H__
+
+/* #include "bglink.h" */
+#include <linux/ioctl.h>
+
+#define TORUS_MAX_MTU (39 * 240)
+
+#define BGP_TORUS_MAX_IRQS 96
+
+#define BGP_TORUS_GROUPS 4
+#define BGP_TORUS_DMA_SIZE (sizeof(struct torus_dma) * BGP_TORUS_GROUPS)
+
+#define BGP_TORUS_INJ_FIFOS 32
+#define BGP_TORUS_RCV_FIFOS 8
+#define BGP_TORUS_COUNTERS 64
+#define BGP_TORUS_DMA_REGIONS 8
+
+#define BGP_TORUS_TX_ENTRIES 256
+#define BGP_TORUS_RX_ENTRIES 512
+
+#define BGP_TORUS_USER_GROUP 1
+
+/* IOCTLs for UL DMA */
+#define TORUS_IOCTL 'T'
+#define TORUS_ALLOC_TX_COUNTER _IO(TORUS_IOCTL, 1)
+#define TORUS_ALLOC_RX_COUNTER _IO(TORUS_IOCTL, 2)
+#define TORUS_ALLOC_TX_FIFO _IO(TORUS_IOCTL, 3)
+#define TORUS_ALLOC_RX_FIFO _IO(TORUS_IOCTL, 4)
+#define TORUS_FREE_TX_COUNTER _IO(TORUS_IOCTL, 5)
+#define TORUS_FREE_RX_COUNTER _IO(TORUS_IOCTL, 6)
+#define TORUS_FREE_TX_FIFO _IO(TORUS_IOCTL, 7)
+#define TORUS_FREE_RX_FIFO _IO(TORUS_IOCTL, 8)
+#define TORUS_REGISTER_TX_MEM _IO(TORUS_IOCTL, 9)
+#define TORUS_REGISTER_RX_MEM _IO(TORUS_IOCTL, 10)
+#define TORUS_DMA_RANGECHECK _IO(TORUS_IOCTL, 11)
+
+
+struct torus_fifo {
+ u32 start;
+ u32 end;
+ volatile u32 head;
+ volatile u32 tail;
+};
+
+struct torus_dma {
+ struct {
+ struct torus_fifo fifo[BGP_TORUS_INJ_FIFOS]; /* 0 - 1ff */
+ u32 empty; /* 200 */
+ u32 __unused0; /* 204 */
+ u32 avail; /* 208 */
+ u32 __unused1; /* 20c */
+ u32 threshold; /* 210 */
+ u32 __unused2; /* 214 */
+ u32 clear_threshold; /* 218 */
+ u32 __unused3; /* 21c */
+ u32 dma_active; /* 220 */
+ u32 dma_activate; /* 224 */
+ u32 dma_deactivate; /* 228 */
+ u8 __unused4[0x100-0x2c]; /* 22c - 2ff */
+
+ u32 counter_enabled[2]; /* 300 */
+ u32 counter_enable[2]; /* 308 */
+ u32 counter_disable[2]; /* 310 */
+ u32 __unused5[2]; /* 318 */
+ u32 counter_hit_zero[2]; /* 320 */
+ u32 counter_clear_hit_zero[2]; /* 328 */
+ u32 counter_group_status; /* 330 */
+ u8 __unused6[0x400-0x334]; /* 334 - 3ff */
+
+ struct {
+ u32 counter;
+ u32 increment;
+ u32 base;
+ u32 __unused;
+ } counter[BGP_TORUS_COUNTERS]; /* 400 - 7ff */
+ } __attribute__((packed)) inj;
+
+ struct {
+ struct torus_fifo fifo[BGP_TORUS_RCV_FIFOS]; /* 800 - 87f */
+ struct torus_fifo hdrfifo; /* 880 - 88f */
+ u8 __unused0[0x900-0x890]; /* 890 - 900 */
+
+ u32 glob_ints[16]; /* 900 - 93f */
+ u8 __unused1[0xa00-0x940]; /* 940 - 9ff */
+
+ u32 empty[2]; /* a00 */
+ u32 available[2]; /* a08 */
+ u32 threshold[2]; /* a10 */
+ u32 clear_threshold[2]; /* a18 */
+ u8 __unused2[0xb00 - 0xa20]; /* a20 - aff */
+
+ u32 counter_enabled[2]; /* b00 */
+ u32 counter_enable[2]; /* b08 */
+ u32 counter_disable[2]; /* b10 */
+ u32 __unused3[2]; /* b18 */
+ u32 counter_hit_zero[2]; /* b20 */
+ u32 counter_clear_hit_zero[2]; /* b28 */
+ u32 counter_group_status; /* b30 */
+ u8 __unused4[0xc00 - 0xb34]; /* b34 - bff */
+
+ struct {
+ u32 counter;
+ u32 increment;
+ u32 base;
+ u32 limit;
+ } counter[BGP_TORUS_COUNTERS]; /* c00 - fff */
+ } __attribute__((packed)) rcv;
+};
+
+enum {
+ torus_dir_xplus = 0x20,
+ torus_dir_xminus = 0x10,
+ torus_dir_yplus = 0x08,
+ torus_dir_yminus = 0x04,
+ torus_dir_zplus = 0x02,
+ torus_dir_zminus = 0x01
+};
+
+union torus_fifo_hw_header {
+ struct {
+ u32 csum_skip : 7; /* number of shorts to skip in chksum */
+ u32 sk : 1; /* 0= use csum_skip, 1 skip pkt */
+ u32 dirhint : 6; /* x-,x+,y-,y+,z-,z+ */
+ u32 deposit : 1; /* multicast deposit */
+ u32 pid0 : 1; /* destination fifo group MSb */
+ u32 size : 3; /* size: (size + 1) * 32bytes */
+ u32 pid1 : 1; /* destination fifo group LSb */
+ u32 dma : 1; /* 1=DMA mode, 0=Fifo mode */
+ u32 dyn_routing : 1; /* 1=dynamic routing, */
+ /* 0=deterministic routing */
+ u32 virt_channel : 2; /* channel (0=Dynamic CH0, */
+ /* 1=Dynamic CH1, 2=Bubble, 3=Prio) */
+ u32 dest_x : 8;
+ u32 dest_y : 8;
+ u32 dest_z : 8;
+ u32 reserved : 16;
+ };
+ u8 raw8[8];
+ u32 raw32[2];
+} __attribute__((packed));
+
+union torus_dma_hw_header {
+ struct {
+ u32 : 30;
+ u32 prefetch : 1;
+ u32 local_copy : 1;
+ u32 : 24;
+ u32 counter : 8;
+ u32 base;
+ u32 length;
+ };
+ u32 raw32[2];
+} __attribute__((packed));
+
+union torus_dma_sw_header {
+ struct {
+ u32 offset;
+ u8 counter_id;
+ u8 bytes;
+ u8 unused : 6;
+ u8 pacing : 1;
+ u8 remote_get : 1;
+ };
+ u32 raw32[2];
+} __attribute__((packed));
+
+union torus_inj_desc {
+ u32 raw32[8];
+ struct {
+ union torus_dma_hw_header dma_hw;
+ union torus_fifo_hw_header fifo;
+ union torus_dma_sw_header dma_sw;
+ };
+} __attribute__((packed));
+
+struct torus_tx_ring {
+ union torus_inj_desc *desc;
+ struct sk_buff **skbs;
+ u32 start;
+ unsigned int tail_idx, pending_idx;
+ unsigned counter;
+ phys_addr_t paddr;
+ spinlock_t lock;
+};
+
+union torus_source_id {
+ u32 raw;
+ atomic_t raw_atomic;
+ struct {
+ u32 conn_id : 8;
+ u32 src_key : 24;
+ };
+};
+
+#define TORUS_SOURCE_ID_NULL (~0ul) /* anything that can't be a legitimate id */
+
+union torus_rcv_desc {
+ u32 raw32[256 / sizeof(u32)];
+ u8 raw8[256];
+ struct {
+ union torus_fifo_hw_header fifo;
+ u32 counter;
+ union torus_source_id src_id;
+ u32 data[];
+ };
+} __attribute__((packed));
+
+struct torus_skb_cb {
+ union torus_source_id src_id;
+ u32 received_len;
+ u32 total_len;
+};
+
+struct torus_rx_ring {
+ union torus_rcv_desc *desc;
+ struct sk_buff_head skb_list;
+ u32 start;
+ unsigned int head_idx;
+ phys_addr_t paddr;
+ spinlock_t lock;
+
+ /* bookkeeping for packet currently being reconstructed */
+ union torus_source_id src_id;
+ u32 received_len;
+ u32 total_len;
+ struct sk_buff *skb;
+
+ /* statistics */
+ u32 dropped;
+ u32 delivered;
+};
+
+struct bg_torus {
+ u8 coordinates[3];
+ u8 dimension[3];
+ union torus_source_id source_id;
+
+ spinlock_t lock;
+ struct torus_dma *dma;
+
+ struct torus_tx_ring tx[BGP_TORUS_INJ_FIFOS * BGP_TORUS_GROUPS];
+ struct torus_rx_ring rx[BGP_TORUS_RCV_FIFOS * BGP_TORUS_GROUPS];
+
+ /* mapping from counter to tx ring index */
+ int inj_counter_to_txidx[BGP_TORUS_COUNTERS * BGP_TORUS_GROUPS];
+
+ /* counters used */
+ unsigned long inj_counter_map[BGP_TORUS_COUNTERS * BGP_TORUS_GROUPS /
+ sizeof(unsigned long) / 8];
+ unsigned long rcv_counter_map[BGP_TORUS_COUNTERS * BGP_TORUS_GROUPS /
+ sizeof(unsigned long) / 8];
+
+ /* fifos used */
+ unsigned long inj_fifo_map[BGP_TORUS_INJ_FIFOS * BGP_TORUS_GROUPS /
+ sizeof(unsigned long) / 8 + 1];
+ unsigned long rcv_fifo_map[BGP_TORUS_RCV_FIFOS * BGP_TORUS_GROUPS /
+ sizeof(unsigned long) / 8 + 1];
+
+ /* dma regions used */
+ unsigned long inj_dma_region_map;
+ unsigned long rcv_dma_region_map;
+
+ unsigned int dcr_base, dcr_size;
+ struct resource pdma, pfifo0, pfifo1;
+ int virq[BGP_TORUS_MAX_IRQS];
+
+ struct of_device *ofdev;
+ struct ctl_table_header *sysctl_header;
+};
+
+
+extern inline void bgtorus_init_inj_desc(struct bg_torus *torus,
+ union torus_inj_desc *desc,
+ int len, u8 x, u8 y, u8 z)
+{
+ memset(desc, 0, sizeof(*desc));
+
+ desc->fifo.sk = 1; /* skip checksum */
+ desc->fifo.size = 7; /* always full 240 bytes packets */
+ desc->fifo.dyn_routing = 1;
+ desc->fifo.dest_x = x;
+ desc->fifo.dest_y = y;
+ desc->fifo.dest_z = z;
+
+ desc->dma_hw.length = len;
+
+ /* atomic { desc->dma_sw.raw32[1] = ++torus->source_id.conn_id; } */
+ desc->dma_sw.raw32[1] =
+ atomic_add_return(1U << 24, &torus->source_id.raw_atomic);
+}
+
+int bgtorus_xmit(struct bg_torus *torus, union torus_inj_desc *desc,
+ struct sk_buff *skb);
+
+
+#endif /* !__DRIVERS__BLUEGENE__TORUS_H__ */
diff --git a/drivers/net/bgp_torus/bgtornic.c b/drivers/net/bgp_torus/bgtornic.c
new file mode 100644
index 00000000000000..da3a9b2e871d79
--- /dev/null
+++ b/drivers/net/bgp_torus/bgtornic.c
@@ -0,0 +1,597 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Chris Ward <tjcw@uk.ibm.com>
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * Description: Blue Gene driver exposing tree and torus as a NIC
+ *
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/etherdevice.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+
+#include <net/arp.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/bgp_personality.h>
+#include <asm/bluegene.h>
+#include <linux/KernelFxLog.h>
+
+
+#include "bgtornic.h"
+
+int col_start_xmit(struct sk_buff *skb, struct net_device *dev);
+
+/* #define TRUST_TORUS_CRC */
+
+#if !defined(CONFIG_BLUEGENE_TCP_WITHOUT_NAPI)
+/* Select operation with linux 'dev->poll' */
+#define TORNIC_DEV_POLL
+#endif
+
+/* #define TORNIC_TASKLET_BGNET */
+
+/* #define TORNIC_TRANSMIT_TREE_TASKLET */
+
+#include "../bgp_network/bgp_net_traceflags.h"
+
+#define ENABLE_TRACE
+
+/* #define REQUIRE_TRACE */
+
+#if defined(ENABLE_TRACE)
+extern int bgp_dma_tcp_tracemask ;
+/* extern int bgtorus_debug_tracemask ; */
+#define bgtornic_debug_tracemask bgp_dma_tcp_tracemask
+/* static int bgtornic_debug_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */
+#endif
+
+#if defined(REQUIRE_TRACE)
+#define TRACE(x...) KernelFxLog(1,x)
+#define TRACE1(x...) KernelFxLog(1,x)
+#define TRACE2(x...) KernelFxLog(1,x)
+#define TRACEN(i,x...) KernelFxLog(1,x)
+#define TRACED(x...) KernelFxLog(1,x)
+#define TRACES(x...) KernelFxLog(1,x)
+#elif defined(ENABLE_TRACE)
+#define TRACE(x...) KernelFxLog(bgtornic_debug_tracemask & k_t_general,x)
+#define TRACE1(x...) KernelFxLog(bgtornic_debug_tracemask & k_t_lowvol,x)
+#define TRACE2(x...) KernelFxLog(bgtornic_debug_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgtornic_debug_tracemask & (i),x)
+#define TRACED(x...) KernelFxLog(1,x)
+#define TRACES(x...) KernelFxLog(1,x)
+#else
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#endif
+
+/* #define TORNIC_FORCE_BROADCAST 1 */
+/**********************************************************************
+ * defines
+ **********************************************************************/
+
+static const char version[] = "Bgtornet: Version 1.0, (c) 2008,2010 IBM Corporation, GPL";
+
+/**********************************************************************
+ * Linux module
+ **********************************************************************/
+
+MODULE_DESCRIPTION("BlueGene Torus Ethernet driver");
+MODULE_LICENSE("GPL");
+
+
+int bgtornic_driverparm = 0 ;
+int bgnet_receive_torus(struct sk_buff * skb) ;
+void dma_tcp_poll_handler(void) ;
+void dma_tcp_rx_enable(void) ;
+
+/* Diagnostic options */
+enum {
+ k_inhibit_scattergather = 0 , /* Whether to tell linux we cannot do 'scattergather' DMA TODO: test whether scattergathers actually work, using (e.g.) NFS */
+ k_inhibit_gso = 1 /* Whether to tell linux not to try Generic Segmentation Offload ; not useful until I can get s-g working with multiple frags in a skb */
+};
+
+
+static void dumpmem(const void *address, unsigned int length, const char * label) __attribute__((unused)) ;
+static void dumpmem(const void *address, unsigned int length, const char * label)
+ {
+ int x ;
+ TRACEN(k_t_fifocontents|k_t_scattergather,"Memory dump, length=0x%08x: %s",length,label) ;
+ if( length > 20*32 ) {
+ length = 20*32 ;
+ }
+ for (x=0;x<length;x+=32)
+ {
+ int *v = (int *)(address+x) ;
+ TRACEN(k_t_fifocontents|k_t_scattergather,"%p: %08x %08x %08x %08x %08x %08x %08x %08x",
+ v,v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]
+ ) ;
+ }
+ }
+
+
+
+static BGP_Personality_t personality;
+static struct net_device *static_dev ;
+
+
+/* int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev) ; */
+int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev
+/* ,unsigned int x, unsigned int y, unsigned int z */
+ ) ;
+
+/**********************************************************************
+ * Linux' packet and skb management
+ **********************************************************************/
+
+static int bgtornet_change_mtu(struct net_device *dev, int new_mtu)
+{
+/* struct bgtornet_dev *bgtornet = netdev_priv(dev); */
+ if (new_mtu < 60 || new_mtu > BGTORNET_MAX_MTU )
+ return -EINVAL;
+ dev->mtu = new_mtu;
+/* bgtree_set_mtu(bgtornet->tree, new_mtu) ; */
+ return 0;
+}
+
+
+/* Take 2 bytes from every 16 to form a frame verifier */
+static unsigned int asf_frame_verifier(const char * data, unsigned int length)
+{
+ const unsigned int * data_int = (unsigned int *) data ;
+ unsigned int result = 0 ;
+ unsigned int index ;
+ for(index=0; index<length/sizeof(unsigned int);index += 4)
+ {
+ result += data_int[index] ;
+ }
+ return result & 0xffff ;
+}
+
+static int bgtornet_receive(struct sk_buff *skb, struct bglink_hdr *lnkhdr,
+ struct bglink_proto* proto)
+{
+ struct net_device *dev = (struct net_device*)proto->private;
+ struct bgtornet_dev *bgtornet = netdev_priv(dev);
+
+ TRACE("bgtornet rcvd pkt: data=%p, len=%d, head=%d, tail=%d, res len=%d",
+ skb->data, skb->len, lnkhdr->opt_eth.pad_head,
+ lnkhdr->opt_eth.pad_tail, skb->len - lnkhdr->opt_eth.pad_head - lnkhdr->opt_eth.pad_tail);
+
+
+ /* skb_pull and trim check for over/underruns. For 0 size the
+ * add/subtract is the same as a test */
+ __skb_pull(skb, lnkhdr->opt_eth.pad_head);
+ __skb_trim(skb, skb->len - lnkhdr->opt_eth.pad_tail);
+
+
+
+/* dumpmem(skb->data,skb->len,"Frame delivered via torus") ; */
+
+ skb->dev = dev;
+ skb->protocol = eth_type_trans(skb, dev);
+
+ TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ;
+ netif_rx(skb);
+
+
+ dev->last_rx = jiffies;
+ bgtornet->stats.rx_packets++;
+ bgtornet->stats.rx_bytes += skb->len;
+
+ return 0;
+}
+
+void bgtornet_rx_schedule(void)
+ {
+ TRACEN(k_t_general,"(>) bgtornet_rx_schedule") ;
+ {
+ struct net_device *dev = static_dev;
+ struct bgtornet_dev *bgtornet = netdev_priv(dev);
+ TRACEN(k_t_napi,"netif_rx_schedule(dev=%p,napi=%p)",dev,&bgtornet->napi) ;
+ napi_schedule(&bgtornet->napi) ;
+ }
+ TRACEN(k_t_general,"(<) bgtornet_rx_schedule") ;
+ }
+
+struct net_device_stats *bgtornet_stats(void)
+ {
+ struct net_device *dev = static_dev;
+ struct bgtornet_dev *bgtornet = netdev_priv(dev);
+ return &bgtornet->stats ;
+ }
+
+static int frame_passes_verification(struct sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ unsigned int eth_proto = eth->h_proto ;
+ struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+ unsigned int iphlen = 4*iph->ihl ;
+ struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) );
+ unsigned int ip_proto = iph->protocol ;
+ if( eth_proto == ETH_P_IP && ip_proto == IPPROTO_TCP )
+ {
+ unsigned int tcphlen = 4*tcph->doff ;
+ char * payload = ((char *)(tcph)) + (tcphlen) ;
+ unsigned int payload_len=iph->tot_len-iphlen-tcphlen ;
+ unsigned int framecheck = asf_frame_verifier(payload,payload_len) ;
+ unsigned int rcvcheck = tcph->check ;
+ TRACEN(k_t_general, "framecheck=0x%08x rcvcheck=0x%08x",
+ framecheck, rcvcheck
+ ) ;
+ if( framecheck != rcvcheck)
+ {
+ TRACEN(k_t_request,"(!!!) frame verify fails, framecheck=0x%08x rcvcheck=0x%08x payload_len=%d",
+ framecheck,
+ rcvcheck,
+ payload_len) ;
+ return 0 ;
+ }
+ }
+ return 1 ;
+}
+
+static inline void deliver_frame(struct sk_buff *skb)
+{
+ struct net_device *dev = static_dev;
+ struct bgtornet_dev *bgtornet = netdev_priv(dev);
+
+
+/* dumpmem(skb->data,skb->len,"Frame delivered via torus") ; */
+
+ skb->dev = dev;
+ skb->protocol = eth_type_trans(skb, dev);
+/* skb->pkt_type = PACKET_HOST ; */
+ if( k_trust_torus_crc) skb->ip_summed = CHECKSUM_PARTIAL ;
+
+#if defined(TORNIC_DEV_POLL)
+ TRACEN(k_t_napi,"netif_receive_skb(skb=%p)",skb) ;
+ netif_receive_skb(skb) ;
+#else
+ TRACEN(k_t_napi,"netif_rx(skb=%p)",skb) ;
+ netif_rx(skb);
+#endif
+
+ dev->last_rx = jiffies;
+ bgtornet->stats.rx_packets++;
+ bgtornet->stats.rx_bytes += skb->len;
+}
+
+int bgtornet_receive_torus(struct sk_buff *skb)
+{
+
+ TRACE("bgtornet rcvd pkt: data=%p, len=%d",
+ skb->data, skb->len);
+
+ if( k_asf_frame_verifier )
+ {
+ if (frame_passes_verification(skb))
+ {
+ deliver_frame(skb) ;
+ }
+ else
+ {
+ dev_kfree_skb(skb) ;
+ }
+ }
+ else
+ {
+ deliver_frame(skb) ;
+ }
+
+ TRACE("(<)");
+ return 0;
+}
+
+
+static void inject_verifier(struct sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ unsigned int eth_proto = eth->h_proto ;
+ struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+ unsigned int iphlen = 4*iph->ihl ;
+ struct tcphdr *tcph = (struct tcphdr *) ( ((char *)(iph)) + (iphlen) );
+ unsigned int ip_proto = iph->protocol ;
+ if( eth_proto == ETH_P_IP && ip_proto == IPPROTO_TCP )
+ {
+ unsigned int tcphlen = 4*tcph->doff ;
+ char * payload = ((char *)(tcph)) + (tcphlen) ;
+ unsigned int payload_len=iph->tot_len-iphlen-tcphlen ;
+ unsigned int framecheck = asf_frame_verifier(payload,payload_len) ;
+ tcph->check = framecheck ;
+ TRACEN(k_t_general,"framecheck set to 0x%08x",framecheck) ;
+ }
+
+}
+
+static int bgtornet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+#if defined(CONFIG_BLUEGENE_TCP)
+ struct ethhdr *eth = (struct ethhdr *)skb->data;
+ struct iphdr *iph = (struct iphdr *)((skb->data)+sizeof(struct ethhdr)) ;
+ struct bgtornet_dev *bgtornet = netdev_priv(dev);
+ unsigned int h_proto = eth->h_proto ;
+ unsigned int daddr = iph->daddr ;
+ TRACEN(k_t_general,"(>) skb=%p skb->sk=%p h_dest[%02x:%02x:%02x:%02x:%02x:%02x] daddr=0x%08x", skb, skb->sk,
+ eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5], daddr) ;
+ if( eth->h_dest[0] == 0x00 && eth->h_dest[1] == 0x80 && eth->h_dest[2] == 0x47)
+ {
+
+ if( h_proto == ETH_P_IP && (daddr >> 24) == 12)
+ {
+ eth->h_dest[3]=(daddr >> 16) & 0xff ;
+ eth->h_dest[4]=(daddr >> 8) & 0xff ;
+ eth->h_dest[5]=(daddr& 0xff) - 1 ;
+ }
+
+ if( eth->h_dest[3] == personality.Network_Config.Xcoord
+ && eth->h_dest[4] == personality.Network_Config.Ycoord
+ && eth->h_dest[5] == personality.Network_Config.Zcoord
+ )
+ {
+ netif_rx(skb) ; /* Try to feed the skb to the local networking layer */
+ }
+ else
+ {
+ if( k_asf_frame_verifier ) inject_verifier(skb) ;
+ bgtorus_start_xmit(skb, dev
+/* , eth->h_dest[3],eth->h_dest[4],eth->h_dest[5] */
+ ) ;
+ }
+ bgtornet->stats.tx_packets += 1 ;
+ bgtornet->stats.tx_bytes += skb->len ;
+ }
+ else
+ {
+ /* Request to send a frame over the torus, but not to a torus MAC address. Trace and discard. */
+ TRACEN(k_t_protocol,"skb=%p skb->sk=%p h_dest[%02x:%02x:%02x:%02x:%02x:%02x] not torus-mac", skb, skb->sk,
+ eth->h_dest[0],eth->h_dest[1],eth->h_dest[2],eth->h_dest[3],eth->h_dest[4],eth->h_dest[5]) ;
+/* bgtornet->stats.tx_errors += 1; */
+/* bgtornet->stats.tx_aborted_errors += 1; */
+ dev_kfree_skb(skb) ;
+
+ }
+ TRACEN(k_t_general,"(<)") ;
+#else
+ col_start_xmit(skb, dev) ;
+#endif
+ return 0 ;
+}
+
+static int bgtornet_poll(struct napi_struct * napi, int budget)
+ {
+ struct net_device *dev = napi->dev ;
+ struct bgtornet_dev *bgtornet = netdev_priv(dev);
+ TRACEN(k_t_general,"(>) bgtornet_poll napi=%p dev=%p budget=%d", napi, dev, budget) ;
+ TRACEN(k_t_napi,"napi polling starts") ;
+ dma_tcp_poll_handler() ;
+ TRACEN(k_t_napi,"netif_rx_complete(dev=%p,napi=%p)",dev,&bgtornet->napi) ;
+ napi_complete(&bgtornet->napi);
+ dma_tcp_rx_enable() ;
+ TRACEN(k_t_general,"(<) bgtornet_poll dev=%p", dev) ;
+ return 0 ;
+ }
+
+static void bgtornet_uninit(struct net_device *dev)
+{
+ struct bgtornet_dev *bgtornet = netdev_priv(dev);
+ BUG_ON(bgtornet->lnk.private != dev);
+
+}
+
+static struct net_device_stats *bgtornet_get_stats(struct net_device *dev)
+{
+ struct bgtornet_dev *bgtornet = netdev_priv(dev);
+ return &bgtornet->stats;
+}
+
+
+static int bgtornet_init (struct net_device *dev)
+{
+ struct bgtornet_dev *bgtornet = netdev_priv(dev);
+
+ bgtornet = netdev_priv(dev);
+
+
+
+ /* register with tree */
+ bgtornet->lnk.lnk_proto = bgtornet->tor_protocol;
+ bgtornet->lnk.rcv = bgtornet_receive;
+ bgtornet->lnk.private = dev;
+
+
+
+ return 0;
+}
+
+void bgtornet_set_arp_table_entry(unsigned int x, unsigned int y, unsigned int z, unsigned int ip_address)
+ {
+ struct net_device *dev = static_dev ;
+ __be32 ip = ip_address ;
+ struct neighbour * neigh = neigh_create(&arp_tbl, &ip, dev);
+ if (neigh) {
+ u8 lladdr[6] ;
+ lladdr[0] = 0x00 ;
+ lladdr[1] = 0x80 ;
+ lladdr[2] = 0x47 ;
+ lladdr[3] = x ;
+ lladdr[4] = y ;
+ lladdr[5] = z ;
+ neigh_update(neigh, lladdr, NUD_PERMANENT, NEIGH_UPDATE_F_OVERRIDE);
+ neigh_release(neigh);
+ }
+ }
+
+#if defined(HAVE_NET_DEVICE_OPS)
+static const struct net_device_ops netdev_ops = {
+ .ndo_change_mtu = bgtornet_change_mtu ,
+ .ndo_get_stats = bgtornet_get_stats ,
+ .ndo_start_xmit = bgtornet_start_xmit ,
+ .ndo_init = bgtornet_init ,
+ .ndo_uninit = bgtornet_uninit ,
+};
+#endif
+
+static unsigned int dummy_features ;
+
+static struct ctl_table bgp_tornic_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "features",
+ .data = &dummy_features,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ { 0 },
+} ;
+static struct ctl_path tornic_ctl_path[] = {
+ { .procname = "bgp", .ctl_name = 0, },
+ { .procname = "torusdev", .ctl_name = 0, },
+ { },
+};
+
+
+int __init
+bgtornet_module_init (void)
+{
+
+ struct bgtornet_dev *bgtornet;
+ struct net_device *dev;
+ printk (KERN_INFO "%s\n", version);
+
+ bluegene_getPersonality( &personality, sizeof(personality) );
+
+ dev = alloc_etherdev(sizeof(struct bgtornet_dev));
+ if (!dev)
+ return -ENOMEM;
+
+ static_dev = dev ;
+
+
+ bgtornet = netdev_priv(dev);
+ memset(bgtornet, 0, sizeof(*bgtornet));
+ /* The following probably need to be configurable */
+
+ bgtornet->phandle_torus = 0;
+ bgtornet->eth_mask = 0;
+ dev->dev_addr[0] = 0x00;
+ dev->dev_addr[1] = 0x80;
+ dev->dev_addr[2] = 0x47;
+ dev->dev_addr[3] = personality.Network_Config.Xcoord ;
+ dev->dev_addr[4] = personality.Network_Config.Ycoord ;
+ dev->dev_addr[5] = personality.Network_Config.Zcoord ;
+
+ bgtornet->eth_local = bgtornet->eth_mask & *(unsigned int *)&dev->dev_addr[0];
+
+#if defined(HAVE_NET_DEVICE_OPS)
+ dev->netdev_ops = &netdev_ops ;
+#else
+ dev->init = bgtornet_init;
+ dev->uninit = bgtornet_uninit;
+ dev->get_stats = bgtornet_get_stats;
+ dev->hard_start_xmit = bgtornet_start_xmit;
+ dev->change_mtu = bgtornet_change_mtu;
+#endif
+ dev->mtu = BGTORNET_DEFAULT_MTU;
+
+
+ TRACEN(k_t_napi,"netif_napi_add(dev=%p,napi=%p,poll=bgtornet_poll,weight=16)",dev,&bgtornet->napi) ;
+ netif_napi_add(dev,&bgtornet->napi,bgtornet_poll,16) ;
+ TRACEN(k_t_napi,"napi poll_list=(%p,%p) state=%lu weight=%d poll=%p dev=%p dev_list=(%p,%p)",
+ bgtornet->napi.poll_list.next,bgtornet->napi.poll_list.prev,
+ bgtornet->napi.state,bgtornet->napi.weight,bgtornet->napi.poll,
+ bgtornet->napi.dev,
+ bgtornet->napi.dev_list.next,bgtornet->napi.dev_list.prev ) ;
+ TRACEN(k_t_napi,"napi_enable(napi=%p)",&bgtornet->napi) ;
+ napi_enable(&bgtornet->napi) ;
+ TRACEN(k_t_napi,"napi poll_list=(%p,%p) state=%lu weight=%d poll=%p dev=%p dev_list=(%p,%p)",
+ bgtornet->napi.poll_list.next,bgtornet->napi.poll_list.prev,
+ bgtornet->napi.state,bgtornet->napi.weight,bgtornet->napi.poll,
+ bgtornet->napi.dev,
+ bgtornet->napi.dev_list.next,bgtornet->napi.dev_list.prev ) ;
+
+
+/* If we're trusting the torus hardware, there is no point forming an IP checksum on the send side */
+ dev->features = NETIF_F_HIGHDMA
+ | (k_trust_torus_crc ? (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM | NETIF_F_IPV6_CSUM) : 0 )
+ | (k_inhibit_scattergather ? 0 : NETIF_F_SG) ;
+
+ skb_queue_head_init(&(bgtornet->xmit_list)) ;
+
+
+ if (register_netdev(dev) != 0)
+ goto err;
+ if( k_inhibit_gso )
+ {
+ dev->features &= ~(NETIF_F_GSO) ; // scatter-gather sometimes does not get it right. Might be a problem with GSO or might be broken anyway
+ /* TODO: Isolate whether GSO is broken or whether the torus driver is broken */
+ }
+
+ bgp_tornic_table[0].data = &(dev->features) ;
+
+ register_sysctl_paths(tornic_ctl_path,bgp_tornic_table) ;
+
+ printk(KERN_INFO
+ "%s: BGNET %s, MAC %02x:%02x:%02x:%02x:%02x:%02x\n" "BGTORNET mask 0x%08x local 0x%08x\n",
+ dev->name, "np->full_name",
+ dev->dev_addr[0], dev->dev_addr[1], dev->dev_addr[2],
+ dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5],
+ bgtornet->eth_mask, bgtornet->eth_local
+ );
+
+ return 0;
+
+ err:
+ free_netdev(dev);
+ return -1;
+
+
+ return 0;
+}
+
+void __exit bgtornet_module_exit (void)
+{
+}
+
+/* module_init(bgtornet_module_init); */
+/* module_exit(bgtornet_module_exit); */
diff --git a/drivers/net/bgp_torus/bgtornic.h b/drivers/net/bgp_torus/bgtornic.h
new file mode 100644
index 00000000000000..2139081efd85cd
--- /dev/null
+++ b/drivers/net/bgp_torus/bgtornic.h
@@ -0,0 +1,126 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Volkmar uhlig
+ * Chris Ward <tjcw@uk.ibm.com>
+ *
+ * Description: definitions for BG networks
+ *
+ *
+ ********************************************************************/
+
+#ifndef __DRIVERS__NET__BLUEGENE__BGNIC_H__
+#define __DRIVERS__NET__BLUEGENE__BGNIC_H__
+
+/* #define BG_IRQ(group, irq) ((group) << 5 | (irq)) */
+
+
+/**********************************************************************
+ * link layer
+ **********************************************************************/
+
+/* #define BGNET_P_ETH0 1 */
+/* #define BGNET_P_ETH1 2 */
+/* #define BGNET_P_ETH2 3 */
+/* #define BGNET_P_ETH3 4 */
+/* #define BGNET_P_ETH4 5 */
+/* #define BGNET_P_ETH5 6 */
+/* #define BGNET_P_ETH6 7 */
+/* #define BGNET_P_ETH7 8 */
+/* #define BGNET_P_ETH8 9 */
+/* #define BGNET_P_LAST_ETH BGNET_P_ETH8 */
+/* */
+/* #define BGNET_P_CONSOLE 20 */
+
+/* #define BGNET_FRAG_MTU 240 */
+/* When running 'dma_tcp_frames', we can have an MTU as large as we like. IP limits to 64k, though. */
+enum {
+ BGTORNET_DEFAULT_MTU = ETH_DATA_LEN ,
+ BGTORNET_MAX_MTU = 65536
+};
+#define BGNET_MAX_MTU 65536
+/* #define BGNET_MAX_MTU (BGNET_FRAG_MTU * 128) */
+/* #define BGNET_DEFAULT_MTU (BGNET_FRAG_MTU * 30 - 12) */
+/* #define BGNET_DEFAULT_MTU ETH_DATA_LEN */
+
+enum {
+ k_trust_torus_crc =
+#if defined(BGP_TORUS_IP_CHECKSUM)
+ 0
+#else
+ 1
+#endif
+ ,
+/* #if defined(CONFIG_BGP_TORUS_ADAPTIVE_ROUTING) */
+// k_trust_torus_crc = 1 , /* Whether the IP layer should trust the BGP hardware CRC on the torus network */
+/* #else */
+// k_trust_torus_crc = 1 , /* Whether the IP layer should trust the BGP hardware CRC on the torus network */
+/* #endif */
+ k_asf_frame_verifier = 0 /* Whether to try a frame verifier in the bgtornic layer */
+};
+
+
+struct bglink_hdr
+{
+ unsigned int dst_key;
+ unsigned int src_key;
+ unsigned short conn_id;
+ unsigned char this_pkt;
+ unsigned char total_pkt;
+ unsigned short lnk_proto; /* 1 eth, 2 con, 3... */
+ union {
+ unsigned short optional; /* for encapsulated protocol use */
+ struct {
+ u8 pad_head;
+ u8 pad_tail;
+ } opt_eth;
+ };
+} __attribute__((packed));
+
+
+struct bglink_proto
+{
+ unsigned short lnk_proto;
+ int (*rcv)(struct sk_buff*, struct bglink_hdr*, struct bglink_proto*);
+ void *private;
+ struct list_head list;
+};
+
+struct bgtornet_dev
+{
+ unsigned short tor_protocol;
+ unsigned int eth_mask;
+ unsigned int eth_local;
+ struct bglink_proto lnk;
+ struct net_device_stats stats;
+ u32 phandle_torus;
+ struct napi_struct napi ; /* 2.6.27-ism for NAPI poll */
+ struct sk_buff_head xmit_list ; /* List of skb's to be sent */
+};
+
+extern inline unsigned int eth_to_key(char *addr)
+{
+ unsigned int key;
+ if (is_broadcast_ether_addr(addr))
+ key = ~0U;
+ else
+ key = (addr[3] << 16) | (addr[4] << 8) | (addr[5] << 0);
+ return key;
+}
+
+
+#endif /* !__DRIVERS__NET__BLUEGENE__BGNIC_H__ */
diff --git a/drivers/net/bgp_torus/torus.c b/drivers/net/bgp_torus/torus.c
new file mode 100644
index 00000000000000..884606fa7dd026
--- /dev/null
+++ b/drivers/net/bgp_torus/torus.c
@@ -0,0 +1,548 @@
+/*********************************************************************
+ *
+ * (C) Copyright IBM Corp. 2007,2010
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses>.
+ *
+ * Authors: Chris Ward <tjcw@uk.ibm.com>
+ * Volkmar Uhlig <vuhlig@us.ibm.com>
+ *
+ * Description: Blue Gene low-level driver for tree
+ *
+ ********************************************************************/
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/init.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/cdev.h>
+#include <linux/proc_fs.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/etherdevice.h>
+#include <linux/tcp.h>
+#include <linux/KernelFxLog.h>
+
+#include <net/arp.h>
+
+#include <asm/pgtable.h>
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <linux/irq.h>
+#ifdef CONFIG_PPC_MERGE
+#include <asm/prom.h>
+#include <asm/of_platform.h>
+#endif
+
+#include <asm/bgp_personality.h>
+#include <asm/bluegene.h>
+
+
+/* #include "bgnic.h" */
+/* #include "bgcol.h" */
+
+#define TORUS_DEV_NAME "bgtorus"
+#include "../bgp_network/bgp_net_traceflags.h"
+
+int __init
+bgtornet_module_init(void) ;
+int __init
+bgtornet_module_exit(void) ;
+int __exit
+dma_tcp_module_init(void) ;
+int __exit
+dma_tcp_module_cleanup(void) ;
+
+typedef struct {
+ struct sk_buff_head skb_list_xmit ; /* List of skb's being passed to the tasklet for sending */
+} bg_tcptorus ;
+
+static bg_tcptorus static_torus ;
+
+typedef struct {
+ unsigned char x ;
+ unsigned char y ;
+ unsigned char z ;
+} torusTarget_t ;
+
+/* #define CONFIG_BLUEGENE_TORUS_TRACE */
+
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE)
+/* int bgtorus_debug_tracemask=k_t_general|k_t_lowvol|k_t_irqflow|k_t_irqflow_rcv|k_t_protocol ; */
+/* int bgtorus_debug_tracemask=k_t_protocol; */
+int bgtorus_debug_tracemask=k_t_init ;
+#endif
+
+#if defined(REQUIRE_TRACE)
+#define TRACE(x...) KernelFxLog(1,x)
+#define TRACE1(x...) KernelFxLog(1,x)
+#define TRACE2(x...) KernelFxLog(1,x)
+#define TRACEN(i,x...) KernelFxLog(1,x)
+#define TRACED(x...) KernelFxLog(1,x)
+#define TRACES(x...) KernelFxLog(1,x)
+#elif defined(CONFIG_BLUEGENE_TORUS_TRACE)
+#define TRACE(x...) KernelFxLog(bgtorus_debug_tracemask & k_t_general,x)
+#define TRACE1(x...) KernelFxLog(bgtorus_debug_tracemask & k_t_lowvol,x)
+#define TRACE2(x...) KernelFxLog(bgtorus_debug_tracemask & k_t_detail,x)
+#define TRACEN(i,x...) KernelFxLog(bgtorus_debug_tracemask & (i),x)
+#define TRACED(x...) KernelFxLog(1,x)
+#define TRACES(x...) KernelFxLog(1,x)
+#else
+#define TRACE(x...)
+#define TRACE1(x...)
+#define TRACE2(x...)
+#define TRACEN(i,x...)
+#define TRACED(x...)
+#define TRACES(x...)
+#endif
+
+/* #define HAS_HOSTS */
+/* #define HAS_NICPARM */
+/* #define HAS_DRIVERPARM */
+#define HAS_TORUSDIAG
+
+/* If you need settable parameters for the tree or the NIC (for debugging), enable them here */
+#if defined(HAS_DRIVERPARM)
+static int bgtorus_driverparm ;
+#endif
+
+#if defined(HAS_NICPARM)
+extern int bgnic_driverparm ;
+#endif
+
+/* void torus_learn_host(const char *cp) ; */
+
+int bgp_dma_ethem ; /* Set externally if we want to try 'eth-em' on torus */
+
+/* #define SENDS_WITH_TASKLET */
+
+#define BGP_COL_MAJOR_NUM 120
+#define BGP_TORUS_MAJOR_NUM 121
+#define BGP_GI_MAJOR_NUM 122
+#define BGP_COL_MINOR_NUMS 2
+#define BGP_TORUS_MINOR_NUMS 2
+#define BGP_GI_MINOR_NUMS 4
+#define _BGP_UA_COL0 (0x6)
+#define _BGP_PA_COL0 (0x10000000)
+#define _BGP_UA_COL1 (0x6)
+#define _BGP_PA_COL1 (0x11000000)
+#define _BGP_UA_TORUS0 (0x6)
+#define _BGP_PA_TORUS0 (0x01140000)
+#define _BGP_UA_TORUS1 (0x6)
+#define _BGP_PA_TORUS1 (0x01150000)
+
+/*
+ * device management
+ */
+struct bgpnet_dev
+{
+ int major,minor; /* device major, minor */
+ unsigned long long physaddr; /* physical address */
+ struct task_struct* current; /* process holding device */
+ int signum; /* signal to send holding process */
+ wait_queue_head_t read_wq;
+ int read_complete;
+ void *regs; /* mapped regs (only used with col) */
+ struct semaphore sem; /* interruptible semaphore */
+ struct cdev cdev; /* container device? */
+};
+
+
+#define BGP_MAX_DEVICES 8
+static struct bgpnet_dev bgpnet_devices[BGP_MAX_DEVICES];
+static unsigned int bgpnet_num_devices = 0;
+
+
+static int bgtorus_mappable_module_init(void) ;
+
+static int bgpnet_add_device(int major, int minor, const char* name,
+ unsigned long long base, int irq,
+ irqreturn_t (*irq_handler)(int, void*));
+static int bgpnet_device_open(struct inode *inode, struct file *filp);
+static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *);
+static int bgpnet_device_release(struct inode *inode, struct file * filp);
+static int bgpnet_device_ioctl(struct inode *inode, struct file * filp,
+ unsigned int cmd, unsigned long arg);
+
+
+static struct file_operations bgpnet_device_fops =
+{
+ .owner= THIS_MODULE,
+ .open= bgpnet_device_open,
+ .read= NULL,
+ .write= NULL,
+ .poll= NULL,
+ .ioctl= bgpnet_device_ioctl,
+ .release= bgpnet_device_release,
+ .mmap= bgpnet_device_mmap,
+};
+
+
+
+#if defined(HAS_TORUSDIAG)
+void torus_diag(int param) ; /* So we can drive a function in the torus layer to poke at things */
+#endif
+
+void bgp_dma_tcp_send_and_free( struct sk_buff *skb ) ;
+
+void bgp_dma_tcp_poll(void) ;
+
+
+int col_start_xmit(struct sk_buff *skb, struct net_device *dev) ;
+/* We have a frame which should be routable via the torus. */
+/* For code path checkout, try it via the tree ... */
+int bgtorus_start_xmit(struct sk_buff *skb, struct net_device *dev
+/* , unsigned int x, unsigned int y, unsigned int z */
+ )
+{
+/* int ethem = bgp_dma_ethem ; */
+/* TRACEN(k_t_general,"(>) %s:%d", __func__, __LINE__) ; */
+/* if( 0 == ethem ) */
+/* { */
+/* col_start_xmit(skb, dev) ; */
+/* } */
+/* else */
+/* { */
+/* struct inet_connection_sock *icskp = inet_csk(skb->sk) ; */
+/* if( ethem & 4) */
+/* { */
+/* // Feature for duplicating the frame over the tree, so we can take the torus 'through the motions' */
+/* // as we bring up various drivers */
+/* struct sk_buff *cloneskb = skb_clone(skb, GFP_ATOMIC) ; */
+/* if( cloneskb) */
+/* { */
+/* col_start_xmit(cloneskb, dev) ; */
+/* } */
+/* } */
+/* #if defined(CONFIG_BLUEGENE_TCP) */
+/* if( 1 ) */
+/* { */
+ bgp_dma_tcp_send_and_free(skb
+/* ,x,y,z */
+ ) ;
+/* */
+/* } */
+/* else */
+/* { */
+/* col_start_xmit(skb, dev) ; */
+/* } */
+/* #else */
+/* col_start_xmit(skb, dev) ; */
+/* #endif */
+/* } */
+ TRACEN(k_t_general,"(<) %s:%d", __func__, __LINE__) ;
+ return 0 ;
+}
+
+static int bgtorus_proc_read (char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ int remaining = count;
+ *eof = 1;
+
+ return count-remaining ;
+}
+
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE) || defined(HAS_DRIVERPARM) || defined(HAS_NICPARM) || defined(HAS_TORUSDIAG)
+static unsigned char xtable[256] =
+ {
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ };
+
+static int bgtorus_atoix(const unsigned char *cp)
+ {
+ int result = 0 ;
+ unsigned char ecp = xtable[*cp] ;
+ while (ecp < 0x10)
+ {
+ result = (result << 4 ) | ecp ;
+ cp += 1 ;
+ ecp = xtable[*cp] ;
+ }
+ return result ;
+ }
+#endif
+
+static int bgtorus_proc_write(struct file *filp, const char __user *buff, unsigned long len, void *data)
+ {
+ char proc_write_buffer[256] ;
+ unsigned long actual_len=(len<255) ? len : 255 ;
+ int rc = copy_from_user( proc_write_buffer, buff, actual_len ) ;
+ if( rc != 0 ) return -EFAULT ;
+ proc_write_buffer[actual_len] = 0 ;
+#if defined(HAS_DRIVERPARM)
+ if( 0 == strncmp(proc_write_buffer,"driverparm=",11))
+ {
+ bgtorus_driverparm=bgtorus_atoix(proc_write_buffer+11) ;
+ }
+#endif
+#if defined(HAS_NICPARM)
+ if( 0 == strncmp(proc_write_buffer,"nicparm=",8))
+ {
+ bgnic_driverparm=bgtorus_atoix(proc_write_buffer+8) ;
+ }
+#endif
+#if defined(CONFIG_BLUEGENE_TORUS_TRACE)
+ if ( 0 == strncmp(proc_write_buffer,"tracemask=",10) )
+ {
+ bgtorus_debug_tracemask = bgtorus_atoix(proc_write_buffer+10) ;
+ }
+#endif
+#if defined(HAS_TORUSDIAG)
+ if ( 0 == strncmp(proc_write_buffer,"torusdiag=",10) )
+ {
+ int diag_opcode = bgtorus_atoix(proc_write_buffer+10) ;
+ torus_diag(diag_opcode) ;
+ }
+#endif
+
+ return actual_len ;
+ }
+
+#if defined(TCP_TORUS_AVAILABLE)
+extern BGP_Personality_t tcp_bgp_personality;
+#endif
+
+
+static int __init
+torus_module_init (void)
+{
+ struct proc_dir_entry *ent;
+ TRACEN(k_t_init,"torus_module_init") ;
+ /* ----------------------------------------------------- */
+ /* create /proc entry */
+ /* ----------------------------------------------------- */
+ printk(KERN_INFO "%s:%d create proc ent \n", __func__, __LINE__);
+ ent = create_proc_entry("driver/" TORUS_DEV_NAME, S_IRUGO, NULL);
+ if (ent)
+ {
+ ent->nlink = 1;
+ ent->read_proc = (void *)bgtorus_proc_read;
+ ent->write_proc = (void *)bgtorus_proc_write;
+ }
+#if defined(TCP_TORUS_AVAILABLE)
+ bluegene_getPersonality( &tcp_bgp_personality, sizeof(tcp_bgp_personality) );
+ printk(KERN_NOTICE "Network_Config.Rank=%08x Network_Config.IOnodeRank=%08x\n",
+ tcp_bgp_personality.Network_Config.Rank,
+ tcp_bgp_personality.Network_Config.IOnodeRank
+ ) ;
+#endif
+ skb_queue_head_init(&static_torus.skb_list_xmit) ;
+ /* Bring up the memory-mappable version */
+ bgtorus_mappable_module_init() ;
+ /* NIC and IP driver initialisation */
+ bgtornet_module_init() ;
+ dma_tcp_module_init() ;
+ return 0 ;
+}
+
+static void __exit
+torus_module_exit (void)
+{
+ TRACEN(k_t_init,"torus_module_exit") ;
+ bgtornet_module_exit() ;
+/* dma_tcp_module_cleanup() ; */
+}
+/* Code grabbed from Rch's driver so that we can map the torus for user-space access */
+
+
+static int bgpnet_add_device(int major,
+ int minor,
+ const char* devname,
+ unsigned long long physaddr,
+ int irq,
+ irqreturn_t (*irq_handler)(int, void *))
+{
+ int ret;
+ dev_t devno;
+ struct bgpnet_dev* dev = &bgpnet_devices[bgpnet_num_devices];
+ TRACEN(k_t_init,"bgpnet_add_device devname=%s",devname) ;
+ /* initilize struct */
+ init_MUTEX (&dev->sem);
+ dev->major = major;
+ dev->minor = minor;
+ dev->physaddr = physaddr;
+ init_waitqueue_head(&dev->read_wq);
+ dev->read_complete = 0;
+ if (physaddr) {
+ dev->regs = ioremap(physaddr, 4096);
+ }
+ devno=MKDEV(major,minor);
+
+ /* register i.e., /proc/devices */
+ ret=register_chrdev_region(devno,1,(char *)devname);
+
+ if (ret)
+ {
+ printk (KERN_WARNING "bgpnet: couldn't register device (%d,%d) register_chrdev_region err=%d\n",
+ major,minor,ret);
+ return ret;
+ }
+
+ /* add cdev */
+ cdev_init(&dev->cdev,&bgpnet_device_fops);
+ dev->cdev.owner=THIS_MODULE;
+ dev->cdev.ops=&bgpnet_device_fops;
+ ret=cdev_add(&dev->cdev,devno,1);
+ if (ret)
+ {
+ printk(KERN_WARNING "bgpnet: couldn't register device (%d,%d) cdev_add err=%d\n",
+ major,minor,ret);
+ return ret;
+ }
+
+ /* signul to pass to owning process, should be altered using ioctl */
+ dev->signum=-1;
+
+ bgpnet_num_devices++;
+
+ return 0;
+}
+
+
+static int bgpnet_device_open (struct inode *inode, struct file *filp)
+{
+ struct bgpnet_dev *dev=container_of(inode->i_cdev,struct bgpnet_dev,cdev);
+
+ if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+ up(&dev->sem);
+
+ dev->current=current;
+ filp->private_data = (void*) dev;
+
+ TRACE("bgpnet: device (%d,%d) opened by process \"%s\" pid %i",
+ MAJOR(inode->i_rdev), MINOR(inode->i_rdev), current->comm, current->pid);
+
+ return 0;
+}
+
+
+
+
+static int bgpnet_device_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ unsigned long vsize = vma->vm_end - vma->vm_start;
+ struct bgpnet_dev * device = (struct bgpnet_dev *)filp->private_data;
+ int ret = -1;
+
+ /* ------------------------------------------------------- */
+ /* set up page protection. */
+ /* ------------------------------------------------------- */
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_flags |= VM_IO;
+ vma->vm_flags |= VM_RESERVED;
+
+ /* ------------------------------------------------------- */
+ /* do the mapping */
+ /* ------------------------------------------------------- */
+
+ if (device->physaddr != 0)
+ ret = remap_pfn_range(vma,
+ vma->vm_start,
+ device->physaddr >> PAGE_SHIFT,
+ vsize,
+ vma->vm_page_prot);
+
+ if (ret) {
+ printk (KERN_WARNING "bgpnet: mapping of device (%d,%d) failed\n",
+ device->major, device->minor);
+ } else {
+ TRACE("bgpnet: mapped (%d,%d) to vm=%lx",
+ device->major, device->minor, vma->vm_start);
+ }
+ return ret? -EAGAIN :0;
+}
+
+/* ************************************************************************* */
+/* BG/P network: release device */
+/* ************************************************************************* */
+
+static int bgpnet_device_release (struct inode *inode, struct file * filp)
+{
+ struct bgpnet_dev *dev=(struct bgpnet_dev *)filp->private_data;
+
+ /*Ensure exclusive access*/
+ if(down_interruptible(&dev->sem)) return -ERESTARTSYS;
+
+ dev->current = NULL;
+ up(&dev->sem);
+
+ TRACE("bgpnet: device (%d,%d) successfully released",
+ MAJOR(inode->i_rdev), MINOR(inode->i_rdev));
+ return 0;
+}
+
+
+static int bgpnet_device_ioctl (struct inode *inode,
+ struct file * filp,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ return 0;
+}
+
+static int bgtorus_mappable_module_init(void)
+{
+/* unsigned long long tr0, tr1; */
+ unsigned long long ts0, ts1;
+
+ TRACEN(k_t_init,"bgtorus_mappable_module_init") ;
+
+/* tr0=((unsigned long long)_BGP_UA_COL0<<32) + _BGP_PA_COL0; */
+/* tr1=((unsigned long long)_BGP_UA_COL1<<32) + _BGP_PA_COL1; */
+ ts0=((unsigned long long)_BGP_UA_TORUS0<<32) + _BGP_PA_TORUS0;
+ ts1=((unsigned long long)_BGP_UA_TORUS1<<32) + _BGP_PA_TORUS1;
+
+ bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 0, "bgptorus_g0", ts0, -1, NULL);
+ bgpnet_add_device(BGP_TORUS_MAJOR_NUM, 1, "bgptorus_g1", ts1, -1, NULL);
+
+ mb();
+
+ return 0;
+
+}
+
+
+/* module_init(bgtorus_mappable_module_init); */
+
+module_init(torus_module_init);
+module_exit(torus_module_exit);
diff --git a/fs/Kconfig b/fs/Kconfig
index 93945dd0b1aed9..47927a4421a7a7 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -170,7 +170,7 @@ config TMPFS_POSIX_ACL
config HUGETLBFS
bool "HugeTLB file system support"
- depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
+ depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BLUEGENE || \
(S390 && 64BIT) || BROKEN
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
diff --git a/include/linux/KernelFxLog.h b/include/linux/KernelFxLog.h
new file mode 100644
index 00000000000000..3766013d9adc27
--- /dev/null
+++ b/include/linux/KernelFxLog.h
@@ -0,0 +1,35 @@
+#ifndef __KernelFxLogger_h__
+#define __KernelFxLogger_h__
+
+static const char * FindShortPathName(const char *PN, unsigned int length) __attribute__ ((unused)) ;
+static const char * FindShortPathName(const char *PN, unsigned int length)
+ {
+ int slashcount = 0;
+ int i;
+ for( i = length-1; i >= 0 ; i-- )
+ {
+ if( PN[i] == '/' )
+ {
+ slashcount++;
+ if( slashcount == 3 )
+ break;
+ }
+ }
+ return PN+i ;
+ }
+
+
+#define KernelFxLog(dbgcat, fmt, args...) \
+ do { \
+ if(dbgcat) \
+ { \
+ static const char filename[] = __FILE__ ; \
+ printk(KERN_INFO " %5d %1X ..%20s %4d %30s() " fmt "\n", \
+ current->pid, \
+ current_thread_info()->cpu, \
+ FindShortPathName(filename,sizeof(filename)), __LINE__, __FUNCTION__, ## args); \
+ } \
+ } while (0)
+
+
+#endif
diff --git a/include/linux/alignment_histograms.h b/include/linux/alignment_histograms.h
new file mode 100644
index 00000000000000..484d1d62fd5e30
--- /dev/null
+++ b/include/linux/alignment_histograms.h
@@ -0,0 +1,38 @@
+#ifndef _LINUX_ALIGNMENT_HISTOGRAM_H
+#define _LINUX_ALIGNMENT_HISTOGRAM_H
+
+#include <linux/autoconf.h>
+
+#if defined(CONFIG_DEBUG_ALIGNMENT_HISTOGRAM)
+
+enum {
+ k_histogram_size=16
+};
+struct alignment_histogram {
+ int src_alignment_histogram_crc[k_histogram_size] ;
+ int dst_alignment_histogram_crc[k_histogram_size] ;
+ int rel_alignment_histogram_crc[k_histogram_size] ;
+ int src_alignment_histogram_copy[k_histogram_size] ;
+ int dst_alignment_histogram_copy[k_histogram_size] ;
+ int rel_alignment_histogram_copy[k_histogram_size] ;
+ int tagged[k_histogram_size] ;
+ long long int qcopybytes ;
+ long long int copybytes ;
+ long long int copybytesshort ;
+ long long int copybytesmisalign ;
+ long long int copybytesbroke ;
+ long long int crcbytes ;
+ long long int csumpartialbytes ;
+ int min_size_of_interest ;
+};
+extern struct alignment_histogram al_histogram ;
+
+#define INC_AL_HISTOGRAM(Name,Address,Size) \
+ { if((Size) >= al_histogram.min_size_of_interest) { al_histogram.Name[(Address)&(k_histogram_size-1)] += 1 ; } }
+#define AL_HISTOGRAM(Name,Index) (al_histogram.Name[(Index)&(k_histogram_size-1)])
+#else
+#define INC_AL_HISTOGRAM(Name,Address,Size)
+#define AL_HISTOGRAM(Name,Index) 0
+#endif
+
+#endif
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9f315382610b57..a72bcaeefa77e0 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -646,7 +646,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
* for_each_cpu_and - iterate over every cpu in both masks
* @cpu: the (optionally unsigned) integer iterator
* @mask: the first cpumask pointer
- * @and: the second cpumask pointer
+ * @andmask: the second cpumask pointer
*
* This saves a temporary CPU mask in many places. It is equivalent to:
* struct cpumask tmp;
@@ -656,9 +656,9 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
*
* After the loop, cpu is >= nr_cpu_ids.
*/
-#define for_each_cpu_and(cpu, mask, and) \
+#define for_each_cpu_and(cpu, mask, andmask) \
for ((cpu) = -1; \
- (cpu) = cpumask_next_and((cpu), (mask), (and)), \
+ (cpu) = cpumask_next_and((cpu), (mask), (andmask)), \
(cpu) < nr_cpu_ids;)
#endif /* SMP */
diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h
index e576b848ce10f8..eeb3fb447c01d7 100644
--- a/include/linux/kmalloc_sizes.h
+++ b/include/linux/kmalloc_sizes.h
@@ -19,27 +19,34 @@
CACHE(32768)
CACHE(65536)
CACHE(131072)
-#if KMALLOC_MAX_SIZE >= 262144
+#if (NR_CPUS > 512) || (MAX_NUMNODES > 256) || !defined(CONFIG_MMU)
CACHE(262144)
#endif
-#if KMALLOC_MAX_SIZE >= 524288
+#ifdef CONFIG_BGP
+/* Intended for 'large' allocations of DMA buffers at boot time, because I cannot get bootmem_alloc to work */
+ CACHE(262144)
CACHE(524288)
-#endif
-#if KMALLOC_MAX_SIZE >= 1048576
CACHE(1048576)
-#endif
-#if KMALLOC_MAX_SIZE >= 2097152
CACHE(2097152)
-#endif
-#if KMALLOC_MAX_SIZE >= 4194304
CACHE(4194304)
-#endif
-#if KMALLOC_MAX_SIZE >= 8388608
+#if defined(CONFIG_HUGE_KMALLOC)
CACHE(8388608)
-#endif
-#if KMALLOC_MAX_SIZE >= 16777216
CACHE(16777216)
-#endif
-#if KMALLOC_MAX_SIZE >= 33554432
CACHE(33554432)
+ CACHE(67108864)
+ CACHE(134217728)
+ CACHE(268435456)
+ CACHE(536870912)
+#endif
#endif
+#ifndef CONFIG_MMU
+ CACHE(524288)
+ CACHE(1048576)
+#ifdef CONFIG_LARGE_ALLOCS
+ CACHE(2097152)
+ CACHE(4194304)
+ CACHE(8388608)
+ CACHE(16777216)
+ CACHE(33554432)
+#endif /* CONFIG_LARGE_ALLOCS */
+#endif /* CONFIG_MMU */
diff --git a/include/linux/resource.h b/include/linux/resource.h
index 40fc7e62608220..8e9e437a369f83 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -62,7 +62,11 @@ struct rlimit {
* GPG2 wants 64kB of mlocked memory, to make sure pass phrases
* and other sensitive information are never written to disk.
*/
+#if defined(CONFIG_INFINIBAND)
+#define MLOCK_LIMIT (10*1024*PAGE_SIZE)
+#else
#define MLOCK_LIMIT ((PAGE_SIZE > 64*1024) ? PAGE_SIZE : 64*1024)
+#endif
/*
* Due to binary compatibility, the actual resource numbers
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9dcf956ad18ab2..3dac14d62a96d2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -124,8 +124,13 @@ struct sk_buff_head {
struct sk_buff;
+#if defined(CONFIG_BGP)
+/* Set 'high' to give scope for ZRL 'soft Iwarp' over the BlueGene torus */
+#define MAX_SKB_FRAGS 18
+#else
/* To allow 64K frame to be packed as single skb without frag_list */
#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
+#endif
typedef struct skb_frag_struct skb_frag_t;
diff --git a/include/linux/stddef.h b/include/linux/stddef.h
index 6a40c76bdcf1a7..18b8dcf8935dac 100644
--- a/include/linux/stddef.h
+++ b/include/linux/stddef.h
@@ -12,10 +12,13 @@
#ifdef __KERNEL__
+/* C++ preprocessor has 'false' and 'true' as keywords, so the enum doesn't work */
+#if !defined(__cplusplus)
enum {
false = 0,
true = 1
};
+#endif
#undef offsetof
#ifdef __compiler_offsetof
diff --git a/include/net/tcp_hiatus.h b/include/net/tcp_hiatus.h
new file mode 100644
index 00000000000000..7b61940ac0e7fc
--- /dev/null
+++ b/include/net/tcp_hiatus.h
@@ -0,0 +1,31 @@
+#ifndef _NET_TCP_HIATUS_H
+#define _NET_TCP_HIATUS_H
+
+/*
+ * Attempt to streamline TCP. Gather statistics on tx sleeps
+ */
+enum {
+ k_tcp_launched, /* Number of frames launched */
+ k_tcp_wait_for_sndbuf,
+ k_tcp_wait_for_memory,
+ k_tcp_defer_mtu_probe,
+ k_tcp_defer_cwnd_quota,
+ k_tcp_defer_snd_wnd,
+ k_tcp_defer_nagle,
+ k_tcp_defer_should,
+ k_tcp_defer_fragment,
+ k_tcp_launch_failed,
+ k_tcp_hiatus_reasons
+};
+#if defined(CONFIG_TCP_HIATUS_COUNTS)
+extern int tcp_hiatus_counts[k_tcp_hiatus_reasons] ;
+#endif
+
+static inline void increment_tcp_hiatus_count(int X)
+{
+#if defined(CONFIG_TCP_HIATUS_COUNTS)
+ tcp_hiatus_counts[X] += 1 ;
+#endif
+}
+
+#endif
diff --git a/kernel/printk.c b/kernel/printk.c
index e3602d0755b0dd..d085a246407d64 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -49,7 +49,10 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
/* We show everything that is MORE important than this.. */
#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
-#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
+/* Noisy kernel 7 */
+/* #define DEFAULT_CONSOLE_LOGLEVEL 7 */ /* anything MORE serious than KERN_DEBUG */
+/* Quiet kernel 3 */
+#define DEFAULT_CONSOLE_LOGLEVEL 3 /* KERN_ERR */
DECLARE_WAIT_QUEUE_HEAD(log_wait);
@@ -698,9 +701,10 @@ asmlinkage int vprintk(const char *fmt, va_list args)
t = cpu_clock(printk_cpu);
nanosec_rem = do_div(t, 1000000000);
- tlen = sprintf(tbuf, "[%5lu.%06lu] ",
+ tlen = sprintf(tbuf, "[%5lu.%06lu]:%x ",
(unsigned long) t,
- nanosec_rem / 1000);
+ nanosec_rem / 1000,
+ printk_cpu);
for (tp = tbuf; tp < tbuf + tlen; tp++)
emit_log_char(*tp);
@@ -713,7 +717,10 @@ asmlinkage int vprintk(const char *fmt, va_list args)
emit_log_char(*p);
if (*p == '\n')
+ {
new_text_line = 1;
+ if( p[1] == '\n' ) p++ ; /* Don't double-line-space */
+ };
}
/*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1bcf9cd4baa08a..7e0839706cb826 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -95,7 +95,8 @@ config HEADERS_CHECK
config DEBUG_SECTION_MISMATCH
bool "Enable full Section mismatch analysis"
- depends on UNDEFINED
+ depends on UNDEFINED || BLUEGENE
+ default y if BLUEGENE
# This option is on purpose disabled for now.
# It will be enabled when we are down to a resonable number
# of section mismatch warnings (< 10 for an allyesconfig build)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 76b148bcb0dcb9..8eaa92eeba7960 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -271,6 +271,7 @@
#include <net/ip.h>
#include <net/netdma.h>
#include <net/sock.h>
+#include <net/tcp_hiatus.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
@@ -292,6 +293,17 @@ atomic_t tcp_memory_allocated; /* Current allocated memory. */
EXPORT_SYMBOL(tcp_memory_allocated);
/*
+ * Statistics about the number of waits in TCP for various reasons
+ */
+#if defined(CONFIG_TCP_HIATUS_COUNTS)
+int tcp_hiatus_counts[k_tcp_hiatus_reasons] ;
+EXPORT_SYMBOL(tcp_hiatus_counts) ;
+#endif
+#if defined(CONFIG_BGP_TORUS_DIAGNOSTICS)
+int tcp_scattergather_frag_limit ;
+EXPORT_SYMBOL(tcp_scattergather_frag_limit) ;
+#endif
+/*
* Current number of TCP sockets.
*/
struct percpu_counter tcp_sockets_allocated;
@@ -306,6 +318,7 @@ struct tcp_splice_state {
unsigned int flags;
};
+
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
@@ -640,8 +653,13 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
{
struct sk_buff *skb;
+#if defined(CONFIG_BLUEGENE)
+ /* Desire to have the TCP header quadword-aligned. */
+ size = ALIGN(size, 16);
+#else
/* The TCP header must be at least 32-bit aligned. */
size = ALIGN(size, 4);
+#endif
skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp);
if (skb) {
@@ -710,10 +728,18 @@ new_segment:
i = skb_shinfo(skb)->nr_frags;
can_coalesce = skb_can_coalesce(skb, i, page, offset);
+/* #if defined(CONFIG_BGP_TORUS_DIAGNOSTICS) */
+/* // Scatter-gather in torus driver not handling well if we have more than one frag */
+/* if (!can_coalesce && ((i > tcp_scattergather_frag_limit) || (i >= MAX_SKB_FRAGS))) { */
+/* tcp_mark_push(tp, skb); */
+/* goto new_segment; */
+/* } */
+/* #else */
if (!can_coalesce && i >= MAX_SKB_FRAGS) {
tcp_mark_push(tp, skb);
goto new_segment;
}
+/* #endif */
if (!sk_wmem_schedule(sk, copy))
goto wait_for_memory;
@@ -753,8 +779,12 @@ new_segment:
continue;
wait_for_sndbuf:
+
+ increment_tcp_hiatus_count(k_tcp_wait_for_sndbuf) ;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
+
+ increment_tcp_hiatus_count(k_tcp_wait_for_memory) ;
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
@@ -999,8 +1029,10 @@ new_segment:
continue;
wait_for_sndbuf:
+ increment_tcp_hiatus_count(k_tcp_wait_for_sndbuf) ;
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
wait_for_memory:
+ increment_tcp_hiatus_count(k_tcp_wait_for_memory) ;
if (copied)
tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index da2c3b8794f2b3..69e77d9e427c8c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -35,6 +35,7 @@
*/
#include <net/tcp.h>
+#include <net/tcp_hiatus.h>
#include <linux/compiler.h>
#include <linux/module.h>
@@ -59,6 +60,15 @@ int sysctl_tcp_base_mss __read_mostly = 512;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
+#if defined(CONFIG_TCP_CONGESTION_OVERRIDES)
+int sysctl_tcp_force_nodelay ;
+int sysctl_tcp_permit_cwnd ;
+int sysctl_tcp_max_cwnd = 1000 ;
+EXPORT_SYMBOL(sysctl_tcp_force_nodelay) ;
+EXPORT_SYMBOL(sysctl_tcp_permit_cwnd) ;
+EXPORT_SYMBOL(sysctl_tcp_max_cwnd) ;
+#endif
+
static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1145,6 +1155,11 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
in_flight = tcp_packets_in_flight(tp);
cwnd = tp->snd_cwnd;
+#if defined(CONFIG_TCP_CONGESTION_OVERRIDES)
+ cwnd = (cwnd < sysctl_tcp_permit_cwnd)
+ ? sysctl_tcp_permit_cwnd
+ : ( ( cwnd > sysctl_tcp_max_cwnd) ? sysctl_tcp_max_cwnd : cwnd ) ;
+#endif
if (in_flight < cwnd)
return (cwnd - in_flight);
@@ -1213,6 +1228,11 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
return 1;
+#if defined(CONFIG_TCP_CONGESTION_OVERRIDES)
+ if (sysctl_tcp_force_nodelay)
+ return 1 ;
+#endif
+
return 0;
}
@@ -1508,6 +1528,7 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
}
+
/* This routine writes packets to the network. It advances the
* send_head. This happens as incoming acks open up the remote
* window for us.
@@ -1534,6 +1555,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
/* Do MTU probing. */
result = tcp_mtu_probe(sk);
if (!result) {
+ increment_tcp_hiatus_count(k_tcp_defer_mtu_probe) ;
return 0;
} else if (result > 0) {
sent_pkts = 1;
@@ -1548,20 +1570,32 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota)
+ {
+ increment_tcp_hiatus_count(k_tcp_defer_cwnd_quota) ;
break;
+ }
if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
+ {
+ increment_tcp_hiatus_count(k_tcp_defer_snd_wnd) ;
break;
+ }
if (tso_segs == 1) {
if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
(tcp_skb_is_last(sk, skb) ?
nonagle : TCP_NAGLE_PUSH))))
+ {
+ increment_tcp_hiatus_count(k_tcp_defer_nagle) ;
break;
+ }
} else {
if (!push_one && tcp_tso_should_defer(sk, skb))
+ {
+ increment_tcp_hiatus_count(k_tcp_defer_should) ;
break;
}
+ }
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
@@ -1570,13 +1604,20 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now)))
+ {
+ increment_tcp_hiatus_count(k_tcp_defer_fragment) ;
break;
+ }
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
+ {
+ increment_tcp_hiatus_count(k_tcp_launch_failed) ; /* e.g. no memory when building TCP header */
break;
+ }
+ increment_tcp_hiatus_count(k_tcp_launched) ; /* Eventually, we didn't 'sleep' it. */
/* Advance the send_head. This one is sent out.
* This call will increment packets_out.
*/
diff --git a/net/socket.c b/net/socket.c
index 35dd7371752a94..2ed4918dc4b279 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1384,7 +1384,9 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
* necessary for a listen, and if that works, we mark the socket as
* ready for listening.
*/
-
+#if defined(CONFIG_BGP_TORUS)
+extern int sysctl_bgp_torus_backlog_floor ;
+#endif
SYSCALL_DEFINE2(listen, int, fd, int, backlog)
{
struct socket *sock;
@@ -1396,6 +1398,10 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
if ((unsigned)backlog > somaxconn)
backlog = somaxconn;
+#if defined(CONFIG_BGP_TORUS)
+/* Apps (particularly mpich2) sometimes set 'backlog' a long way too small for cloud computing */
+ if(backlog < sysctl_bgp_torus_backlog_floor ) backlog = sysctl_bgp_torus_backlog_floor ;
+#endif
err = security_socket_listen(sock, backlog);
if (!err)